core_cm4_simd.h
1 /**************************************************************************//** 2 * @file core_cm4_simd.h 3 * @brief CMSIS Cortex-M4 SIMD Header File 4 * @version V3.01 5 * @date 06. March 2012 6 * 7 * @note 8 * Copyright (C) 2010-2012 ARM Limited. All rights reserved. 9 * 10 * @par 11 * ARM Limited (ARM) is supplying this software for use with Cortex-M 12 * processor based microcontrollers. This file can be freely distributed 13 * within development tools that are supporting such ARM based processors. 14 * 15 * @par 16 * THIS SOFTWARE IS PROVIDED "AS IS". NO WARRANTIES, WHETHER EXPRESS, IMPLIED 17 * OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE APPLY TO THIS SOFTWARE. 19 * ARM SHALL NOT, IN ANY CIRCUMSTANCES, BE LIABLE FOR SPECIAL, INCIDENTAL, OR 20 * CONSEQUENTIAL DAMAGES, FOR ANY REASON WHATSOEVER. 21 * 22 ******************************************************************************/ 23 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 28 #ifndef __CORE_CM4_SIMD_H 29 #define __CORE_CM4_SIMD_H 30 31 32 /******************************************************************************* 33 * Hardware Abstraction Layer 34 ******************************************************************************/ 35 36 37 /* ################### Compiler specific Intrinsics ########################### */ 38 /** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics 39 Access to dedicated SIMD instructions 40 @{ 41 */ 42 43 44 #if defined ( __GNUC__ ) /*------------------ GNU Compiler ---------------------*/ 45 /* GNU gcc specific functions */ 46 47 /*------ CM4 SIMD Intrinsics -----------------------------------------------------*/ 48 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD8(uint32_t op1, uint32_t op2) 49 { 50 uint32_t result; 51 52 __ASM volatile ("sadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 53 return(result); 54 } 55 56 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD8(uint32_t op1, uint32_t op2) 57 { 58 uint32_t result; 59 60 __ASM volatile ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 61 return(result); 62 } 63 64 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD8(uint32_t op1, uint32_t op2) 65 { 66 uint32_t result; 67 68 __ASM volatile ("shadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 69 return(result); 70 } 71 72 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD8(uint32_t op1, uint32_t op2) 73 { 74 uint32_t result; 75 76 __ASM volatile ("uadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 77 return(result); 78 } 79 80 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD8(uint32_t op1, uint32_t op2) 81 { 82 uint32_t result; 83 84 __ASM volatile ("uqadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 85 return(result); 86 } 87 88 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD8(uint32_t op1, uint32_t op2) 89 { 90 uint32_t result; 91 92 __ASM volatile ("uhadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 93 return(result); 94 } 95 96 97 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB8(uint32_t op1, uint32_t op2) 98 { 99 uint32_t result; 100 101 __ASM volatile ("ssub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 102 return(result); 103 } 104 105 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2) 106 { 107 uint32_t result; 108 109 __ASM volatile ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 110 return(result); 111 } 112 113 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB8(uint32_t op1, uint32_t op2) 114 { 115 uint32_t result; 116 117 __ASM volatile ("shsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 118 return(result); 119 } 120 121 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB8(uint32_t op1, uint32_t op2) 122 { 123 uint32_t result; 124 125 __ASM volatile ("usub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 126 return(result); 127 } 128 129 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB8(uint32_t op1, uint32_t op2) 130 { 131 uint32_t result; 132 133 __ASM volatile ("uqsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 134 return(result); 135 } 136 137 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB8(uint32_t op1, uint32_t op2) 138 { 139 uint32_t result; 140 141 __ASM volatile ("uhsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 142 return(result); 143 } 144 145 146 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD16(uint32_t op1, uint32_t op2) 147 { 148 uint32_t result; 149 150 __ASM volatile ("sadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 151 return(result); 152 } 153 154 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD16(uint32_t op1, uint32_t op2) 155 { 156 uint32_t result; 157 158 __ASM volatile ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 159 return(result); 160 } 161 162 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2) 163 { 164 uint32_t result; 165 166 __ASM volatile ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 167 return(result); 168 } 169 170 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD16(uint32_t op1, uint32_t op2) 171 { 172 uint32_t result; 173 174 __ASM volatile ("uadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 175 return(result); 176 } 177 178 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD16(uint32_t op1, uint32_t op2) 179 { 180 uint32_t result; 181 182 __ASM volatile ("uqadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 183 return(result); 184 } 185 186 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD16(uint32_t op1, uint32_t op2) 187 { 188 uint32_t result; 189 190 __ASM volatile ("uhadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 191 return(result); 192 } 193 194 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB16(uint32_t op1, uint32_t op2) 195 { 196 uint32_t result; 197 198 __ASM volatile ("ssub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 199 return(result); 200 } 201 202 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2) 203 { 204 uint32_t result; 205 206 __ASM volatile ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 207 return(result); 208 } 209 210 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2) 211 { 212 uint32_t result; 213 214 __ASM volatile ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 215 return(result); 216 } 217 218 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB16(uint32_t op1, uint32_t op2) 219 { 220 uint32_t result; 221 222 __ASM volatile ("usub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 223 return(result); 224 } 225 226 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB16(uint32_t op1, uint32_t op2) 227 { 228 uint32_t result; 229 230 __ASM volatile ("uqsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 231 return(result); 232 } 233 234 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB16(uint32_t op1, uint32_t op2) 235 { 236 uint32_t result; 237 238 __ASM volatile ("uhsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 239 return(result); 240 } 241 242 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SASX(uint32_t op1, uint32_t op2) 243 { 244 uint32_t result; 245 246 __ASM volatile ("sasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 247 return(result); 248 } 249 250 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QASX(uint32_t op1, uint32_t op2) 251 { 252 uint32_t result; 253 254 __ASM volatile ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 255 return(result); 256 } 257 258 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHASX(uint32_t op1, uint32_t op2) 259 { 260 uint32_t result; 261 262 __ASM volatile ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 263 return(result); 264 } 265 266 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UASX(uint32_t op1, uint32_t op2) 267 { 268 uint32_t result; 269 270 __ASM volatile ("uasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 271 return(result); 272 } 273 274 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQASX(uint32_t op1, uint32_t op2) 275 { 276 uint32_t result; 277 278 __ASM volatile ("uqasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 279 return(result); 280 } 281 282 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHASX(uint32_t op1, uint32_t op2) 283 { 284 uint32_t result; 285 286 __ASM volatile ("uhasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 287 return(result); 288 } 289 290 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSAX(uint32_t op1, uint32_t op2) 291 { 292 uint32_t result; 293 294 __ASM volatile ("ssax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 295 return(result); 296 } 297 298 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSAX(uint32_t op1, uint32_t op2) 299 { 300 uint32_t result; 301 302 __ASM volatile ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 303 return(result); 304 } 305 306 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2) 307 { 308 uint32_t result; 309 310 __ASM volatile ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 311 return(result); 312 } 313 314 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAX(uint32_t op1, uint32_t op2) 315 { 316 uint32_t result; 317 318 __ASM volatile ("usax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 319 return(result); 320 } 321 322 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSAX(uint32_t op1, uint32_t op2) 323 { 324 uint32_t result; 325 326 __ASM volatile ("uqsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 327 return(result); 328 } 329 330 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSAX(uint32_t op1, uint32_t op2) 331 { 332 uint32_t result; 333 334 __ASM volatile ("uhsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 335 return(result); 336 } 337 338 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAD8(uint32_t op1, uint32_t op2) 339 { 340 uint32_t result; 341 342 __ASM volatile ("usad8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 343 return(result); 344 } 345 346 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USADA8(uint32_t op1, uint32_t op2, uint32_t op3) 347 { 348 uint32_t result; 349 350 __ASM volatile ("usada8 %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 351 return(result); 352 } 353 354 #define __SSAT16(ARG1,ARG2) \ 355 ({ \ 356 uint32_t __RES, __ARG1 = (ARG1); \ 357 __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ 358 __RES; \ 359 }) 360 361 #define __USAT16(ARG1,ARG2) \ 362 ({ \ 363 uint32_t __RES, __ARG1 = (ARG1); \ 364 __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ 365 __RES; \ 366 }) 367 368 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTB16(uint32_t op1) 369 { 370 uint32_t result; 371 372 __ASM volatile ("uxtb16 %0, %1" : "=r" (result) : "r" (op1)); 373 return(result); 374 } 375 376 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTAB16(uint32_t op1, uint32_t op2) 377 { 378 uint32_t result; 379 380 __ASM volatile ("uxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 381 return(result); 382 } 383 384 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTB16(uint32_t op1) 385 { 386 uint32_t result; 387 388 __ASM volatile ("sxtb16 %0, %1" : "=r" (result) : "r" (op1)); 389 return(result); 390 } 391 392 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2) 393 { 394 uint32_t result; 395 396 __ASM volatile ("sxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 397 return(result); 398 } 399 400 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2) 401 { 402 uint32_t result; 403 404 __ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 405 return(result); 406 } 407 408 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2) 409 { 410 uint32_t result; 411 412 __ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 413 return(result); 414 } 415 416 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3) 417 { 418 uint32_t result; 419 420 __ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 421 return(result); 422 } 423 424 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3) 425 { 426 uint32_t result; 427 428 __ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 429 return(result); 430 } 431 432 #define __SMLALD(ARG1,ARG2,ARG3) \ 433 ({ \ 434 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \ 435 __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \ 436 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \ 437 }) 438 439 #define __SMLALDX(ARG1,ARG2,ARG3) \ 440 ({ \ 441 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \ 442 __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \ 443 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \ 444 }) 445 446 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2) 447 { 448 uint32_t result; 449 450 __ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 451 return(result); 452 } 453 454 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2) 455 { 456 uint32_t result; 457 458 __ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 459 return(result); 460 } 461 462 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSD (uint32_t op1, uint32_t op2, uint32_t op3) 463 { 464 uint32_t result; 465 466 __ASM volatile ("smlsd %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 467 return(result); 468 } 469 470 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3) 471 { 472 uint32_t result; 473 474 __ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 475 return(result); 476 } 477 478 #define __SMLSLD(ARG1,ARG2,ARG3) \ 479 ({ \ 480 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \ 481 __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \ 482 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \ 483 }) 484 485 #define __SMLSLDX(ARG1,ARG2,ARG3) \ 486 ({ \ 487 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \ 488 __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \ 489 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \ 490 }) 491 492 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SEL (uint32_t op1, uint32_t op2) 493 { 494 uint32_t result; 495 496 __ASM volatile ("sel %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 497 return(result); 498 } 499 500 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD(uint32_t op1, uint32_t op2) 501 { 502 uint32_t result; 503 504 __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 505 return(result); 506 } 507 508 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB(uint32_t op1, uint32_t op2) 509 { 510 uint32_t result; 511 512 __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 513 return(result); 514 } 515 516 #define __PKHBT(ARG1,ARG2,ARG3) \ 517 ({ \ 518 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ 519 __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ 520 __RES; \ 521 }) 522 523 #define __PKHTB(ARG1,ARG2,ARG3) \ 524 ({ \ 525 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ 526 if (ARG3 == 0) \ 527 __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \ 528 else \ 529 __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ 530 __RES; \ 531 }) 532 533 /*-- End CM4 SIMD Intrinsics -----------------------------------------------------*/ 534 535 #endif /* __GNUC__ */ 536 537 #endif /* __CORE_CM4_SIMD_H */ 538 539 #ifdef __cplusplus 540 } 541 #endif