make_nm.cpp
1 #include <stdio.h> 2 #include "xbyak/xbyak.h" 3 #include "xbyak/xbyak_bin2hex.h" 4 #include <stdlib.h> 5 #include <string.h> 6 #include "cybozu/inttype.hpp" 7 #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) 8 9 using namespace Xbyak; 10 11 const int bitEnd = 64; 12 13 const uint64_t MMX = 1ULL << 0; 14 const uint64_t _XMM = 1ULL << 1; 15 const uint64_t _MEM = 1ULL << 2; 16 const uint64_t _REG32 = 1ULL << 3; 17 const uint64_t EAX = 1ULL << 4; 18 const uint64_t IMM32 = 1ULL << 5; 19 const uint64_t IMM8 = 1ULL << 6; 20 const uint64_t _REG8 = 1ULL << 7; 21 const uint64_t _REG16 = 1ULL << 8; 22 const uint64_t NEG8 = 1ULL << 9; 23 const uint64_t IMM16 = 1ULL << 10; 24 const uint64_t NEG16 = 1ULL << 11; 25 const uint64_t AX = 1ULL << 12; 26 const uint64_t AL = 1ULL << 13; 27 const uint64_t IMM_1 = 1ULL << 14; 28 const uint64_t MEM8 = 1ULL << 15; 29 const uint64_t MEM16 = 1ULL << 16; 30 const uint64_t MEM32 = 1ULL << 17; 31 const uint64_t ONE = 1ULL << 19; 32 const uint64_t CL = 1ULL << 20; 33 const uint64_t MEM_ONLY_DISP = 1ULL << 21; 34 const uint64_t NEG32 = 1ULL << 23; 35 const uint64_t _YMM = 1ULL << 24; 36 const uint64_t VM32X_32 = 1ULL << 39; 37 const uint64_t VM32X_64 = 1ULL << 40; 38 const uint64_t VM32Y_32 = 1ULL << 41; 39 const uint64_t VM32Y_64 = 1ULL << 42; 40 #ifdef XBYAK64 41 const uint64_t _MEMe = 1ULL << 25; 42 const uint64_t REG32_2 = 1ULL << 26; // r8d, ... 43 const uint64_t REG16_2 = 1ULL << 27; // r8w, ... 44 const uint64_t REG8_2 = 1ULL << 28; // r8b, ... 45 const uint64_t REG8_3 = 1ULL << 29; // spl, ... 46 const uint64_t _REG64 = 1ULL << 30; // rax, ... 47 const uint64_t _REG64_2 = 1ULL << 31; // r8, ... 48 const uint64_t RAX = 1ULL << 32; 49 const uint64_t _XMM2 = 1ULL << 33; 50 const uint64_t _YMM2 = 1ULL << 34; 51 const uint64_t VM32X = VM32X_32 | VM32X_64; 52 const uint64_t VM32Y = VM32Y_32 | VM32Y_64; 53 #else 54 const uint64_t _MEMe = 0; 55 const uint64_t REG32_2 = 0; 56 const uint64_t REG16_2 = 0; 57 const uint64_t REG8_2 = 0; 58 const uint64_t REG8_3 = 0; 59 const uint64_t _REG64 = 0; 60 const uint64_t _REG64_2 = 0; 61 const uint64_t RAX = 0; 62 const uint64_t _XMM2 = 0; 63 const uint64_t _YMM2 = 0; 64 const uint64_t VM32X = VM32X_32; 65 const uint64_t VM32Y = VM32Y_32; 66 #endif 67 const uint64_t REG64 = _REG64 | _REG64_2 | RAX; 68 const uint64_t REG32 = _REG32 | REG32_2 | EAX; 69 const uint64_t REG16 = _REG16 | REG16_2 | AX; 70 const uint64_t REG32e = REG32 | REG64; 71 const uint64_t REG8 = _REG8 | REG8_2|AL; 72 const uint64_t MEM = _MEM | _MEMe; 73 const uint64_t MEM64 = 1ULL << 35; 74 const uint64_t ST0 = 1ULL << 36; 75 const uint64_t STi = 1ULL << 37; 76 const uint64_t IMM_2 = 1ULL << 38; 77 const uint64_t IMM = IMM_1 | IMM_2; 78 const uint64_t XMM = _XMM | _XMM2; 79 const uint64_t YMM = _YMM | _YMM2; 80 const uint64_t K = 1ULL << 43; 81 const uint64_t _ZMM = 1ULL << 44; 82 const uint64_t _ZMM2 = 1ULL << 45; 83 #ifdef XBYAK64 84 const uint64_t ZMM = _ZMM | _ZMM2; 85 const uint64_t _YMM3 = 1ULL << 46; 86 #else 87 const uint64_t ZMM = _ZMM; 88 const uint64_t _YMM3 = 0; 89 #endif 90 const uint64_t K2 = 1ULL << 47; 91 const uint64_t ZMM_SAE = 1ULL << 48; 92 const uint64_t ZMM_ER = 1ULL << 49; 93 #ifdef XBYAK64 94 const uint64_t _XMM3 = 1ULL << 50; 95 #endif 96 const uint64_t XMM_SAE = 1ULL << 51; 97 #ifdef XBYAK64 98 const uint64_t XMM_KZ = 1ULL << 52; 99 const uint64_t YMM_KZ = 1ULL << 53; 100 const uint64_t ZMM_KZ = 1ULL << 54; 101 #else 102 const uint64_t XMM_KZ = 0; 103 const uint64_t YMM_KZ = 0; 104 const uint64_t ZMM_KZ = 0; 105 #endif 106 const uint64_t MEM_K = 1ULL << 55; 107 const uint64_t M_1to2 = 1ULL << 56; 108 const uint64_t M_1to4 = 1ULL << 57; 109 const uint64_t M_1to8 = 1ULL << 58; 110 const uint64_t M_1to16 = 1ULL << 59; 111 const uint64_t XMM_ER = 1ULL << 60; 112 const uint64_t M_xword = 1ULL << 61; 113 const uint64_t M_yword = 1ULL << 62; 114 const uint64_t MY_1to4 = 1ULL << 18; 115 const uint64_t BNDREG = 1ULL << 22; 116 117 const uint64_t NOPARA = 1ULL << (bitEnd - 1); 118 119 class Test { 120 Test(const Test&); 121 void operator=(const Test&); 122 const bool isXbyak_; 123 int funcNum_; 124 /* 125 and_, or_, xor_, not_ => and, or, xor, not 126 */ 127 std::string removeUnderScore(std::string s) const 128 { 129 if (!isXbyak_ && s[s.size() - 1] == '_') s.resize(s.size() - 1); 130 return s; 131 } 132 133 // check all op1, op2, op3 134 void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const 135 { 136 for (int i = 0; i < bitEnd; i++) { 137 if ((op1 & (1ULL << i)) == 0) continue; 138 for (int j = 0; j < bitEnd; j++) { 139 if ((op2 & (1ULL << j)) == 0) continue; 140 for (int k = 0; k < bitEnd; k++) { 141 if ((op3 & (1ULL << k)) == 0) continue; 142 for (int s = 0; s < bitEnd; s++) { 143 if ((op4 & (1ULL << s)) == 0) continue; 144 printf("%s ", nm.c_str()); 145 if (isXbyak_) printf("("); 146 if (!(op1 & NOPARA)) printf("%s", get(1ULL << i)); 147 if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j)); 148 if (!(op3 & NOPARA)) printf(", %s", get(1ULL << k)); 149 if (!(op4 & NOPARA)) printf(", %s", get(1ULL << s)); 150 if (isXbyak_) printf("); dump();"); 151 printf("\n"); 152 } 153 } 154 } 155 } 156 } 157 void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const 158 { 159 for (int i = 0; i < bitEnd; i++) { 160 if ((op & (1ULL << i)) == 0) continue; 161 printf("%s ", nm); 162 if (isXbyak_) printf("("); 163 if (!(op & NOPARA)) printf("%s", get(1ULL << i)); 164 printf(", %s", isXbyak_ ? xbyak : nasm); 165 if (isXbyak_) printf("); dump();"); 166 printf("\n"); 167 } 168 } 169 void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const 170 { 171 if (nasm == 0) nasm = xbyak; 172 for (int i = 0; i < bitEnd; i++) { 173 if ((op & (1ULL << i)) == 0) continue; 174 printf("%s ", nm); 175 if (isXbyak_) printf("("); 176 printf("%s ", isXbyak_ ? xbyak : nasm); 177 if (!(op & NOPARA)) printf(", %s", get(1ULL << i)); 178 if (isXbyak_) printf("); dump();"); 179 printf("\n"); 180 } 181 } 182 void put(const char *nm, const char *para1, uint64_t op2, const char *para3) const 183 { 184 for (int j = 0; j < bitEnd; j++) { 185 if ((op2 & (1ULL << j)) == 0) continue; 186 printf("%s ", nm); 187 if (isXbyak_) printf("("); 188 printf("%s", para1); 189 if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j)); 190 printf(", %s", para3); 191 if (isXbyak_) printf("); dump();"); 192 printf("\n"); 193 } 194 } 195 const char *get(uint64_t type) const 196 { 197 int idx = (rand() / 31) & 7; 198 if (type == ST0) { 199 return "st0"; 200 } 201 if (type == STi) { 202 return "st2"; 203 } 204 switch (type) { 205 case MMX: 206 { 207 static const char MmxTbl[][4] = { 208 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" 209 }; 210 return MmxTbl[idx]; 211 } 212 case _XMM: 213 { 214 static const char tbl[][6] = { 215 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 216 }; 217 return tbl[idx]; 218 } 219 case _YMM: 220 { 221 static const char tbl[][6] = { 222 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7" 223 }; 224 return tbl[idx]; 225 } 226 case _ZMM: 227 { 228 static const char tbl[][6] = { 229 "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7" 230 }; 231 return tbl[idx]; 232 } 233 #ifdef XBYAK64 234 case _XMM2: 235 { 236 static const char tbl[][6] = { 237 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" 238 }; 239 return tbl[idx]; 240 } 241 case _XMM3: 242 { 243 static const char tbl[][6] = { 244 "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23" 245 }; 246 return tbl[idx]; 247 } 248 case _YMM2: 249 { 250 static const char tbl[][6] = { 251 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", 252 }; 253 return tbl[idx]; 254 } 255 case _YMM3: 256 { 257 static const char tbl[][6] = { 258 "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", 259 }; 260 return tbl[idx]; 261 } 262 case _ZMM2: 263 { 264 static const char tbl[][6] = { 265 "zmm8", "zmm9", "zmm10", "zmm11", "zmm28", "zmm29", "zmm30", "zmm31", 266 }; 267 return tbl[idx]; 268 } 269 #endif 270 case _MEM: 271 { 272 return isXbyak_ ? "ptr[eax+ecx+3]" : "[eax+ecx+3]"; // QQQ : disp8N 273 /* 274 idx %= 5; 275 switch (idx) { 276 case 0: return isXbyak_ ? "ptr[eax+ecx]" : "[eax+ecx]"; 277 case 1: return isXbyak_ ? "ptr[eax+ecx+1]" : "[eax+ecx+1]"; 278 case 2: return isXbyak_ ? "ptr[eax+ecx+16]" : "[eax+ecx+16]"; 279 case 3: return isXbyak_ ? "ptr[eax+ecx+32]" : "[eax+ecx+32]"; 280 case 4: return isXbyak_ ? "ptr[eax+ecx+48]" : "[eax+ecx+48]"; 281 } 282 */ 283 } 284 case _MEMe: 285 { 286 static int ccc = 1; 287 #ifdef USE_YASM 288 ccc++; 289 #endif 290 if (ccc & 1) { 291 return isXbyak_ ? "ptr[rdx+r15+0x12]" : "[rdx+r15+0x12]"; 292 } else { 293 return isXbyak_ ? "ptr[rip - 0x13456+1-3]" : "[rip - 0x13456+1-3]"; 294 } 295 } 296 case MEM8: 297 return "byte [eax+edx]"; 298 case MEM16: 299 return "word [esi]"; 300 case MEM32: 301 return "dword [ebp*2]"; 302 case MEM64: 303 return "qword [eax+ecx*8]"; 304 case MEM_ONLY_DISP: 305 return isXbyak_ ? "ptr[(void*)0x123]" : "[0x123]"; 306 case _REG16: // not ax 307 { 308 static const char Reg16Tbl[][4] = { 309 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" 310 }; 311 return Reg16Tbl[(idx % 7) + 1]; 312 } 313 case _REG8: // not al 314 { 315 static const char Reg8Tbl[][4] = { 316 #ifdef XBYAK64 // QQQ 317 "al", "cl", "dl", "bl", "al", "cl", "dl", "bl" 318 #else 319 "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" 320 #endif 321 }; 322 return Reg8Tbl[(idx % 7) + 1]; 323 } 324 case _REG32: // not eax 325 { 326 static const char Reg32Tbl[][4] = { 327 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" 328 }; 329 return Reg32Tbl[(idx % 7) + 1]; 330 } 331 #ifdef XBYAK64 332 case _REG64: // not rax 333 { 334 static const char Reg64Tbl[][4] = { 335 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi" 336 }; 337 return Reg64Tbl[(idx % 7) + 1]; 338 } 339 case _REG64_2: 340 { 341 static const char Reg64_2Tbl[][4] = { 342 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 343 }; 344 return Reg64_2Tbl[idx]; 345 } 346 case REG32_2: 347 { 348 static const char Reg32eTbl[][5] = { 349 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" 350 }; 351 return Reg32eTbl[idx]; 352 } 353 case REG16_2: 354 { 355 static const char Reg16eTbl[][5] = { 356 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" 357 }; 358 return Reg16eTbl[idx]; 359 } 360 case REG8_2: 361 { 362 static const char Reg8_2Tbl[][5] = { 363 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" 364 }; 365 return Reg8_2Tbl[idx]; 366 } 367 case REG8_3: 368 { 369 static const char Reg8_3Tbl[][5] = { 370 "spl", "bpl", "sil", "dil", "spl", "bpl", "sil", "dil" 371 }; 372 return Reg8_3Tbl[idx]; 373 } 374 case RAX: 375 return "rax"; 376 #endif 377 case EAX: 378 return "eax"; 379 case AX: 380 return "ax"; 381 case AL: 382 return "al"; 383 case CL: 384 return "cl"; 385 case ONE: 386 return "1"; 387 case IMM32: 388 return isXbyak_ ? "12345678" : "dword 12345678"; 389 case IMM16: 390 return isXbyak_ ? "1000" : "word 1000"; 391 case IMM8: 392 return isXbyak_ ? "4" : "byte 4"; 393 case NEG8: 394 return isXbyak_ ? "-30" : "byte -30"; 395 case NEG16: 396 return isXbyak_ ? "-1000" : "word -1000"; 397 case NEG32: 398 return isXbyak_ ? "-100000" : "dword -100000"; 399 case IMM_1: 400 return "4"; 401 case IMM_2: 402 return isXbyak_ ? "0xda" : "0xda"; 403 case VM32X_32: 404 return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]"; 405 case VM32X_64: 406 return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]"; 407 case VM32Y_32: 408 return isXbyak_ ? "ptr [ymm4]" : "[ymm4]"; 409 case VM32Y_64: 410 return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]"; 411 case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}"; 412 case M_1to4: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to4}"; 413 case M_1to8: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to8}"; 414 case M_1to16: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to16}"; 415 416 case M_xword: return isXbyak_ ? "ptr [eax+33]" : "oword [eax+33]"; 417 case M_yword: return isXbyak_ ? "yword [eax+33]" : "yword [eax+33]"; 418 case MY_1to4: return isXbyak_ ? "yword_b [eax+32]" : "[eax+32]{1to4}"; 419 case K: 420 { 421 static const char kTbl[][5] = { 422 "k1", "k2", "k3", "k4", "k5", "k6", "k7", 423 }; 424 return kTbl[idx % 7]; 425 } 426 case K2: 427 return isXbyak_ ? "k3 | k5" : "k3{k5}"; 428 case BNDREG: 429 { 430 static const char tbl[][5] = { 431 "bnd0", "bnd1", "bnd2", "bnd3", 432 }; 433 return tbl[idx % 4]; 434 } 435 #ifdef XBYAK64 436 case XMM_SAE: 437 return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}"; 438 case ZMM_SAE: 439 return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}"; 440 case XMM_ER: 441 return isXbyak_ ? "xmm4 | T_rd_sae" : "xmm4, {rd-sae}"; 442 case ZMM_ER: 443 return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}"; 444 case XMM_KZ: 445 return isXbyak_ ? "xmm5 | k5" : "xmm5{k5}"; 446 case YMM_KZ: 447 return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}"; 448 case ZMM_KZ: 449 return isXbyak_ ? "zmm7|k1" : "zmm7{k1}"; 450 case MEM_K: 451 return isXbyak_ ? "ptr [rax] | k1" : "[rax]{k1}"; 452 #else 453 case XMM_SAE: 454 return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}"; 455 case ZMM_SAE: 456 return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}"; 457 case XMM_ER: 458 return isXbyak_ ? "xmm30 | T_rd_sae" : "xmm30, {rd-sae}"; 459 case ZMM_ER: 460 return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}"; 461 case MEM_K: 462 return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}"; 463 #endif 464 } 465 return 0; 466 } 467 void putSIMPLE() const 468 { 469 const char tbl[][20] = { 470 #ifdef XBYAK64 471 "cdqe", 472 "cqo", 473 "cmpsq", 474 "movsq", 475 "popfq", 476 "pushfq", 477 "lodsq", 478 "movsq", 479 "scasq", 480 "stosq", 481 "syscall", 482 "sysret", 483 #else 484 "aaa", 485 "aad", 486 "aam", 487 "aas", 488 "daa", 489 "das", 490 "into", 491 "popad", 492 "popfd", 493 "pusha", 494 "pushad", 495 "pushfd", 496 "popa", 497 #endif 498 499 "cbw", 500 "cdq", 501 "clc", 502 "cld", 503 "cli", 504 "cmc", 505 506 "cpuid", 507 "cwd", 508 "cwde", 509 510 "lahf", 511 // "lock", 512 "cmpsb", 513 "cmpsw", 514 "cmpsd", 515 "hlt", 516 "int3", 517 "leave", 518 "lodsb", 519 "lodsw", 520 "lodsd", 521 "movsb", 522 "movsw", 523 "movsd", 524 "outsb", 525 "outsw", 526 "outsd", 527 "scasb", 528 "scasw", 529 "scasd", 530 "stosb", 531 "stosw", 532 "stosd", 533 "nop", 534 535 "sahf", 536 "serialize", 537 "stc", 538 "std", 539 "sti", 540 "sysenter", 541 "sysexit", 542 543 "emms", 544 "pause", 545 "sfence", 546 "lfence", 547 "mfence", 548 "monitor", 549 "mwait", 550 551 "rdmsr", 552 "rdpmc", 553 "rdtsc", 554 "rdtscp", 555 "ud2", 556 "wait", 557 "fwait", 558 "wbinvd", 559 "wrmsr", 560 "xlatb", 561 "xend", 562 563 "popf", 564 "pushf", 565 "stac", 566 567 "xgetbv", 568 "vzeroall", 569 "vzeroupper", 570 571 "f2xm1", 572 "fabs", 573 "faddp", 574 "fchs", 575 "fclex", 576 "fnclex", 577 "fcom", 578 "fcomp", 579 "fcompp", 580 "fcos", 581 "fdecstp", 582 "fdivp", 583 "fdivrp", 584 "fincstp", 585 "finit", 586 "fninit", 587 "fld1", 588 "fldl2t", 589 "fldl2e", 590 "fldpi", 591 "fldlg2", 592 "fldln2", 593 "fldz", 594 "fmulp", 595 "fnop", 596 "fpatan", 597 "fprem", 598 "fprem1", 599 "fptan", 600 "frndint", 601 "fscale", 602 "fsin", 603 "fsincos", 604 "fsqrt", 605 "fsubp", 606 "fsubrp", 607 "ftst", 608 "fucom", 609 "fucomp", 610 "fucompp", 611 "fxam", 612 "fxch", 613 "fxtract", 614 "fyl2x", 615 "fyl2xp1", 616 617 "monitorx", 618 "mwaitx", 619 "clzero", 620 }; 621 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 622 put(tbl[i]); 623 } 624 { 625 const char memTbl[][16] = { 626 "clflush", 627 "clflushopt", 628 "fbld", 629 "fbstp", 630 "fldcw", 631 "fldenv", 632 "frstor", 633 "fsave", 634 "fnsave", 635 "fstcw", 636 "fnstcw", 637 "fstenv", 638 "fnstenv", 639 "fstsw", 640 "fnstsw", 641 "fxrstor", 642 "clwb", 643 }; 644 for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) { 645 put(memTbl[i], MEM); 646 } 647 put("fstsw", AX); 648 put("fnstsw", AX); 649 } 650 651 put("bswap", REG32e); 652 put("lea", REG32e|REG16, MEM); 653 put("enter", IMM, IMM); 654 put(isXbyak_ ? "int_" : "int", IMM8); 655 put(isXbyak_ ? "in_" : "in", AL|AX|EAX, IMM8); 656 puts(isXbyak_ ? "in_(al, dx); dump();" : "in al, dx"); 657 puts(isXbyak_ ? "in_(ax, dx); dump();" : "in ax, dx"); 658 puts(isXbyak_ ? "in_(eax, dx); dump();" : "in eax, dx"); 659 put(isXbyak_ ? "out_" : "out", IMM8, AL|AX|EAX); 660 puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al"); 661 puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax"); 662 puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax"); 663 puts(isXbyak_ ? "lea(eax, ptr[edi + 4 * eax]); dump();" : "lea eax, [edi + 4 * eax]"); 664 } 665 void putJmp() const 666 { 667 #ifdef XBYAK64 668 put("jmp", REG64); 669 put("call", REG64); 670 #else 671 put("jmp", REG32); 672 put("call", REG16|REG32); 673 #endif 674 put("jmp", MEM); 675 put("jmp", MEM); 676 put("jmp", MEM); 677 put("call", MEM|MEM_ONLY_DISP); 678 #ifndef USE_YASM 679 // call(ptr [getCode() + 5]); means to construct the opecode of "call" 680 // after calling getCode(). 681 // Its behavior is same as NASM(MASM). YASM makes different opecode. 682 put("call", "getCode() + 5", "$ + 5"); 683 #endif 684 685 #ifdef XBYAK64 686 put("jmp", "ptr[(void*)0x12345678]", "[0x12345678]"); 687 put("call", "ptr[(void*)0x12345678]", "[0x12345678]"); 688 #ifdef USE_YASM 689 put("jmp", "ptr[rip + 0x12345678]", "[rip+0x12345678]"); 690 put("call", "ptr[rip + 0x12345678]", "[rip+0x12345678]"); 691 put("call", "ptr[rip -23]", "[rip-23]"); 692 put("call", "ptr[rip -23+56]", "[rip-23+56]"); 693 #else 694 // bug of yasm? 695 if (isXbyak_) { 696 puts("{ Label label0;"); 697 puts("L(label0);"); 698 puts("pshufb (xmm14, ptr [rip+label0]); dump();"); 699 puts("}"); 700 } else { 701 puts("label0:"); 702 puts("pshufb xmm14, [rel label0]"); 703 } 704 #endif 705 #endif 706 } 707 void putFarJmp() const 708 { 709 #ifdef XBYAK64 710 put("jmp", "word[rax],T_FAR", "far word [rax]"); 711 put("jmp", "dword[rax],T_FAR", "far dword [rax]"); 712 put("jmp", "qword[rax],T_FAR", "far qword [rax]"); 713 714 put("call", "word[rax],T_FAR", "far word [rax]"); 715 put("call", "dword[rax],T_FAR", "far dword [rax]"); 716 put("call", "qword[rax],T_FAR", "far qword [rax]"); 717 #else 718 put("jmp", "dword[eax],T_FAR", "far dword [eax]"); 719 put("jmp", "word[eax],T_FAR", "far word [eax]"); 720 721 put("call", "dword[eax],T_FAR", "far dword [eax]"); 722 put("call", "word[eax],T_FAR", "far word [eax]"); 723 #endif 724 } 725 void putMMX1() const 726 { 727 // emms etc 728 put("ldmxcsr", MEM); 729 put("movmskps", REG32e, XMM); 730 put("movmskpd", REG32e, XMM); 731 put("stmxcsr", MEM); 732 put("maskmovq", MMX, MMX); 733 put("movntps", MEM, XMM); 734 put("movntq", MEM, MMX); 735 put("prefetcht0", MEM); 736 put("prefetcht1", MEM); 737 put("prefetcht2", MEM); 738 put("prefetchnta", MEM); 739 put("prefetchwt1", MEM); 740 put("prefetchw", MEM); 741 742 // SSE2 misc 743 put("maskmovdqu", XMM, XMM); 744 put("movntpd", MEM, XMM); 745 put("movntdq", MEM, XMM); 746 put("movnti", MEM, REG32); // QQQ:REG32e? 747 748 put("movhlps", XMM, XMM); 749 put("movlhps", XMM, XMM); 750 751 // movd for MMX, XMM 752 put("movd", MEM|MEM32|REG32, MMX|XMM); 753 put("movd", MMX|XMM, MEM|REG32|MEM32); 754 755 // movq for MMX 756 put("movq", MMX, MMX|MEM); 757 put("movq", MEM, MMX); 758 // movq for XMM 759 put("movq", XMM, XMM|MEM); 760 put("movq", MEM, XMM); 761 put("movq", XMM|MMX, "qword[eax]", "qword[eax]"); 762 put("movq", XMM|MMX, "ptr[eax]", "qword[eax]"); 763 put("movq", "qword[eax]", "qword[eax]", XMM|MMX); 764 put("movq", "ptr[eax]", "qword[eax]", XMM|MMX); 765 #ifdef XBYAK64 766 put("movq", REG64, XMM|MMX); 767 put("movq", XMM|MMX, REG64); 768 #endif 769 770 // SSE3 int 771 put("lddqu", XMM, MEM); 772 } 773 void putMMX2() const 774 { 775 static const char nmTbl[][16] = { 776 // MMX 777 "packssdw", 778 "packsswb", 779 "packuswb", 780 "pand", 781 "pandn", 782 "pmaddwd", 783 "pmulhuw", 784 "pmulhw", 785 "pmullw", 786 "por", 787 "punpckhbw", 788 "punpckhwd", 789 "punpckhdq", 790 "punpcklbw", 791 "punpcklwd", 792 "punpckldq", 793 "pxor", 794 "paddb", 795 "paddw", 796 "paddd", 797 "paddsb", 798 "paddsw", 799 "paddusb", 800 "paddusw", 801 "pcmpeqb", 802 "pcmpeqw", 803 "pcmpeqd", 804 "pcmpgtb", 805 "pcmpgtw", 806 "pcmpgtd", 807 "psllw", 808 "pslld", 809 "psllq", 810 "psraw", 811 "psrad", 812 "psrlw", 813 "psrld", 814 "psrlq", 815 "psubb", 816 "psubw", 817 "psubd", 818 "psubsb", 819 "psubsw", 820 "psubusb", 821 "psubusw", 822 // MMX2 823 "pavgb", 824 "pavgw", 825 "pmaxsw", 826 "pmaxub", 827 "pminsw", 828 "pminub", 829 "psadbw", 830 // 831 "paddq", 832 "pmuludq", 833 "psubq", 834 }; 835 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) { 836 put(nmTbl[i], MMX, MMX|MEM); 837 put(nmTbl[i], XMM, XMM|MEM); 838 } 839 } 840 void putMMX3() const 841 { 842 static const char nmTbl[][16] = { 843 "psllw", 844 "pslld", 845 "psllq", 846 "psraw", 847 "psrad", 848 "psrlw", 849 "psrld", 850 "psrlq", 851 }; 852 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) { 853 put(nmTbl[i], MMX|XMM, IMM); 854 } 855 put("pslldq", XMM, IMM); 856 put("psrldq", XMM, IMM); 857 put("pmovmskb", REG32, MMX|XMM); // QQQ 858 put("pextrw", REG32, MMX|XMM, IMM); // QQQ 859 put("pinsrw", MMX|XMM, REG32|MEM, IMM); // QQQ 860 } 861 void putMMX4() const 862 { 863 put("pshufw", MMX, MMX|MEM, IMM); 864 put("pshuflw", XMM, XMM|MEM, IMM); 865 put("pshufhw", XMM, XMM|MEM, IMM); 866 put("pshufd", XMM, XMM|MEM, IMM); 867 } 868 void putMMX5() const 869 { 870 static const char nmTbl[][16] = { 871 "movdqa", 872 "movdqu", 873 "movaps", 874 "movss", 875 "movups", 876 "movapd", 877 "movsd", 878 "movupd", 879 }; 880 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) { 881 put(nmTbl[i], XMM, XMM|MEM); 882 put(nmTbl[i], MEM, XMM); 883 } 884 put("movq2dq", XMM, MMX); 885 put("movdq2q", MMX, XMM); 886 } 887 888 void putXMM1() const 889 { 890 enum { 891 PS = 1 << 0, 892 SS = 1 << 1, 893 PD = 1 << 2, 894 SD = 1 << 3 895 }; 896 const struct { 897 uint8_t code; 898 const char *name; 899 } sufTbl[] = { 900 { 0, "ps" }, 901 { 0xF3, "ss" }, 902 { 0x66, "pd" }, 903 { 0xF2, "sd" }, 904 }; 905 static const struct XmmTbl1 { 906 uint8_t code; 907 int mode; 908 const char *name; 909 bool hasImm; 910 } xmmTbl1[] = { 911 { B01011000, PS|SS|PD|SD, "add", false }, 912 { B01010101, PS|PD , "andn", false }, 913 { B01010100, PS|PD , "and", false }, 914 { B11000010, PS|SS|PD|SD, "cmp", true }, 915 { B01011110, PS|SS|PD|SD, "div", false }, 916 { B01011111, PS|SS|PD|SD, "max", false }, 917 { B01011101, PS|SS|PD|SD, "min", false }, 918 { B01011001, PS|SS|PD|SD, "mul", false }, 919 { B01010110, PS|PD , "or", false }, 920 { B01010011, PS|SS , "rcp", false }, 921 { B01010010, PS|SS , "rsqrt", false }, 922 { B11000110, PS|PD , "shuf", true }, 923 { B01010001, PS|SS|PD|SD, "sqrt", false }, 924 { B01011100, PS|SS|PD|SD, "sub", false }, 925 { B00010101, PS|PD , "unpckh", false }, 926 { B00010100, PS|PD , "unpckl", false }, 927 { B01010111, PS|PD , "xor", false }, 928 // 929 }; 930 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl1); i++) { 931 const XmmTbl1 *p = &xmmTbl1[i]; 932 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { 933 if (!(p->mode & (1 << j))) continue; 934 char buf[16]; 935 snprintf(buf, sizeof(buf), "%s%s", p->name, sufTbl[j].name); 936 if (p->hasImm) { 937 put(buf, XMM, XMM|MEM, IMM); 938 } else { 939 put(buf, XMM, XMM|MEM); 940 } 941 } 942 } 943 } 944 void putXMM2() const 945 { 946 // (XMM, XMM|MEM) 947 static const char tbl[][16] = { 948 "punpckhqdq", 949 "punpcklqdq", 950 951 "comiss", 952 "ucomiss", 953 "comisd", 954 "ucomisd", 955 956 "cvtpd2ps", 957 "cvtps2pd", 958 "cvtsd2ss", 959 "cvtss2sd", 960 "cvtpd2dq", 961 "cvttpd2dq", 962 "cvtdq2pd", 963 "cvtps2dq", 964 "cvttps2dq", 965 "cvtdq2ps", 966 967 "addsubpd", 968 "addsubps", 969 "haddpd", 970 "haddps", 971 "hsubpd", 972 "hsubps", 973 "movddup", 974 "movshdup", 975 "movsldup", 976 }; 977 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 978 put(tbl[i], XMM, XMM|MEM); 979 } 980 } 981 void putXMM3() const 982 { 983 static const struct Tbl { 984 const char *name; 985 uint64_t op1; 986 uint64_t op2; 987 } tbl[] = { 988 { "cvtpi2ps", XMM, MMX|MEM }, 989 { "cvtps2pi", MMX, XMM|MEM }, 990 { "cvtsi2ss", XMM, REG32|MEM }, 991 { "cvtss2si", REG32, XMM|MEM }, 992 { "cvttps2pi", MMX, XMM|MEM }, 993 { "cvttss2si", REG32, XMM|MEM }, 994 { "cvtpi2pd", XMM, MMX|MEM }, 995 { "cvtpd2pi", MMX, XMM|MEM }, 996 { "cvtsi2sd", XMM, REG32|MEM }, 997 { "cvtsd2si", REG32, XMM|MEM }, 998 { "cvttpd2pi", MMX, XMM|MEM }, 999 { "cvttsd2si", REG32, XMM|MEM }, 1000 }; 1001 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1002 const Tbl *p = &tbl[i]; 1003 put(p->name, p->op1, p->op2); 1004 } 1005 } 1006 void putXMM4() const 1007 { 1008 static const char tbl[][16] = { 1009 "movhps", 1010 "movlps", 1011 "movhpd", 1012 "movlpd", 1013 }; 1014 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1015 const char *p = tbl[i]; 1016 put(p, XMM, MEM); 1017 put(p, MEM, XMM); 1018 } 1019 } 1020 void putCmov() const 1021 { 1022 const char tbl[][4] = { 1023 "o", 1024 "no", 1025 "b", 1026 "c", 1027 "nae", 1028 "nb", 1029 "nc", 1030 "ae", 1031 "e", 1032 "z", 1033 "ne", 1034 "nz", 1035 "be", 1036 "na", 1037 "nbe", 1038 "a", 1039 "s", 1040 "ns", 1041 "p", 1042 "pe", 1043 "np", 1044 "po", 1045 "l", 1046 "nge", 1047 "nl", 1048 "ge", 1049 "le", 1050 "ng", 1051 "nle", 1052 "g", 1053 }; 1054 #if defined(__GNUC__) && !defined(__clang__) 1055 #pragma GCC diagnostic push 1056 #pragma GCC diagnostic ignored "-Wformat-truncation" // wrong detection 1057 #endif 1058 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1059 char buf[32]; 1060 snprintf(buf, sizeof(buf), "cmov%s", tbl[i]); 1061 put(buf, REG16, REG16|MEM); 1062 put(buf, REG32, REG32|MEM); 1063 put(buf, REG64, REG64|MEM); 1064 snprintf(buf, sizeof(buf), "set%s", tbl[i]); 1065 put(buf, REG8|REG8_3|MEM); 1066 } 1067 #if defined(__GNUC__) && !defined(__clang__) 1068 #pragma GCC diagnostic pop 1069 #endif 1070 } 1071 void putReg1() const 1072 { 1073 // (REG, REG|MEM) 1074 { 1075 static const char tbl[][16] = { 1076 "adc", 1077 "add", 1078 "and_", 1079 "cmp", 1080 "or_", 1081 "sbb", 1082 "sub", 1083 "xor_", 1084 }; 1085 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1086 const std::string s = removeUnderScore(tbl[i]); 1087 const char *p = s.c_str(); 1088 put(p, REG32, REG32|MEM); 1089 put(p, REG64, REG64|MEM); 1090 put(p, REG16, REG16|MEM); 1091 put(p, REG8|REG8_3, REG8|MEM); 1092 put(p, MEM, REG32e|REG16|REG8|REG8_3); 1093 1094 put(p, MEM8, IMM8|NEG8); 1095 put(p, MEM16, IMM8|IMM16|NEG8|NEG16); 1096 put(p, MEM32, IMM8|IMM32|NEG8|NEG32); 1097 1098 put(p, REG64|RAX, IMM8|NEG8); 1099 put(p, REG64|RAX, "0x12345678", "0x12345678"); 1100 put(p, REG64|RAX, "192", "192"); 1101 put(p, REG64|RAX, "0x1234", "0x1234"); 1102 put(p, REG32|EAX, IMM8|IMM32|NEG8); 1103 put(p, REG16|AX, IMM8|IMM16|NEG8|NEG16); 1104 put(p, REG8|REG8_3|AL, IMM|NEG8); 1105 } 1106 } 1107 { 1108 const char tbl[][8] = { 1109 "adcx", 1110 "adox", 1111 }; 1112 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1113 const char *p = tbl[i]; 1114 put(p, REG32, REG32|MEM); 1115 put(p, REG64, REG64|MEM); 1116 } 1117 } 1118 } 1119 void putBt() const 1120 { 1121 static const char tbl[][16] = { 1122 "bt", 1123 "bts", 1124 "btr", 1125 "btc", 1126 }; 1127 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1128 const char *p = tbl[i]; 1129 put(p, MEM|REG16, REG16); 1130 put(p, MEM|REG32, REG32); 1131 put(p, MEM|REG64, REG64); 1132 put(p, MEM16|REG16, IMM); 1133 } 1134 } 1135 void putRorM() const 1136 { 1137 static const char tbl[][16] = { 1138 "inc", 1139 "dec", 1140 "div", 1141 "idiv", 1142 "imul", 1143 "mul", 1144 "neg", 1145 "not_", 1146 }; 1147 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1148 const std::string s = removeUnderScore(tbl[i]); 1149 const char *p = s.c_str(); 1150 put(p, REG32e|REG16|REG8|REG8_3); 1151 put(p, MEM32|MEM16|MEM8); 1152 } 1153 const char *p = "imul"; 1154 put(p, REG16, REG16|MEM16); 1155 put(p, REG32, REG32|MEM32); 1156 put(p, REG64, REG64|MEM); 1157 put(p, REG16, REG16|MEM, IMM8|IMM16); 1158 put(p, REG32, REG32|MEM, IMM8|IMM32); 1159 put(p, REG64, REG64|MEM, IMM8|IMM32); 1160 } 1161 void putPushPop() const 1162 { 1163 /* 1164 QQQ: 1165 push byte 2 1166 push dword 2 1167 reduce 4-byte stack 1168 push word 2 1169 reduce 2-byte stack, so I can't support it 1170 */ 1171 1172 put("push", IMM8|IMM32); 1173 if (isXbyak_) { 1174 puts("push(word, 1000);dump();"); 1175 } else { 1176 puts("push word 1000"); 1177 } 1178 1179 put("push", REG16|MEM16); 1180 put("pop", REG16|MEM16); 1181 #ifdef XBYAK64 1182 put("push", REG64|IMM32|MEM64); 1183 put("pop", REG64|MEM64); 1184 #else 1185 put("push", REG32|IMM32|MEM32); 1186 put("pop", REG32|MEM32); 1187 #endif 1188 } 1189 void putPushPop8_16() const 1190 { 1191 const struct { 1192 int b; 1193 uint32_t v; 1194 } tbl[] = { 1195 { 8, 0x7f }, 1196 { 8, 0x80 }, 1197 { 8, 0xff }, 1198 { 8, 0x100 }, 1199 { 8, 0x12345 }, 1200 { 16, 0x7fff }, 1201 { 16, 0x8000 }, 1202 { 16, 0xffff }, 1203 { 16, 0x10000 }, 1204 { 16, 0x12345 }, 1205 }; 1206 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1207 const char *b = tbl[i].b == 8 ? "byte" : "word"; 1208 uint32_t v = tbl[i].v; 1209 if (isXbyak_) { 1210 printf("push(%s, 0x%x);dump();\n", b, v); 1211 } else { 1212 printf("push %s 0x%x\n", b, v); 1213 } 1214 } 1215 } 1216 void putTest() const 1217 { 1218 const char *p = "test"; 1219 put(p, REG32|MEM, REG32); 1220 put(p, REG64|MEM, REG64); 1221 put(p, REG16|MEM, REG16); 1222 put(p, REG8|REG8_3|MEM, REG8|REG8_3); 1223 put(p, REG32e|REG16|REG8|REG8_3|EAX|AX|AL|MEM32|MEM16|MEM8, IMM); 1224 } 1225 void putMov64() const 1226 { 1227 const struct { 1228 const char *a; 1229 const char *b; 1230 } tbl[] = { 1231 { "0", "0" }, 1232 { "0x123", "0x123" }, 1233 { "0x12345678", "0x12345678" }, 1234 { "0x7fffffff", "0x7fffffff" }, 1235 { "0xffffffff", "0xffffffff" }, 1236 { "0x80000000", "0x80000000" }, 1237 { "2147483648U", "2147483648" }, 1238 { "0x80000001", "0x80000001" }, 1239 { "0xffffffffffffffff", "0xffffffffffffffff" }, 1240 { "-1", "-1" }, 1241 { "0xffffffff80000000", "0xffffffff80000000" }, 1242 { "0xffffffff80000001", "0xffffffff80000001" }, 1243 { "0xffffffff12345678", "0xffffffff12345678" }, 1244 }; 1245 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1246 put("mov", REG64, tbl[i].a, tbl[i].b); 1247 } 1248 } 1249 void putLoadSeg() const 1250 { 1251 const struct Tbl { 1252 const char *name; 1253 bool support64Bit; 1254 } tbl[] = { 1255 #ifdef XBYAK32 1256 { "lds", false }, 1257 { "les", false }, 1258 #endif 1259 { "lss", true }, 1260 { "lfs", true }, 1261 { "lgs", true }, 1262 }; 1263 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1264 const Tbl *p = &tbl[i]; 1265 put(p->name, REG16|REG32, MEM); 1266 #ifdef XBYAK64 1267 if (p->support64Bit) { 1268 put(p->name, REG64, MEM); 1269 } 1270 #endif 1271 } 1272 } 1273 // only nasm 1274 void putMovImm64() const 1275 { 1276 put("mov", REG64, "0x1234567890abcdefLL", "0x1234567890abcdef"); 1277 put("mov", REG64, "0x12345678", "0x12345678"); 1278 put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678"); 1279 put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM); 1280 1281 put("mov", EAX, "ptr[(void*)-1]", "[-1]"); 1282 put("mov", EAX, "ptr[(void*)0x7fffffff]", "[0x7fffffff]"); 1283 put("mov", EAX, "ptr[(void*)0xffffffffffffffff]", "[0xffffffffffffffff]"); 1284 } 1285 void putEtc() const 1286 { 1287 { 1288 const char *p = "ret"; 1289 put(p); 1290 put(p, IMM); 1291 p = "retf"; 1292 put(p); 1293 put(p, IMM); 1294 p = "mov"; 1295 put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX); 1296 put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX); 1297 put(p, AX|REG16|MEM|MEM_ONLY_DISP, REG16|AX); 1298 put(p, AL|REG8|REG8_3|MEM|MEM_ONLY_DISP, REG8|REG8_3|AL); 1299 put(p, REG32e|REG16|REG8|RAX|EAX|AX|AL, MEM|MEM_ONLY_DISP); 1300 put(p, MEM32|MEM16|MEM8, IMM); 1301 put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef"); 1302 put("movbe", REG16|REG32e, MEM); 1303 put("movbe", MEM, REG16|REG32e); 1304 #if defined(XBYAK64) && !defined(__ILP32__) 1305 put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]"); 1306 put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL); 1307 put(p, "qword [rax], 0"); 1308 put(p, "qword [rax], 0x12"); 1309 put(p, "qword [rax], 0x1234"); 1310 put(p, "qword [rax], 0x12345678"); 1311 // put(p, "qword [rax], 0x123456789ab"); 1312 put(p, "qword [rax], 1000000"); 1313 put(p, "rdx, qword [rax]"); 1314 #endif 1315 put("mov", EAX, "ptr [eax + ecx * 0]", "[eax + ecx * 0]"); // ignore scale = 0 1316 } 1317 { 1318 const char tbl[][8] = { 1319 "movsx", 1320 "movzx", 1321 }; 1322 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1323 const char *p = tbl[i]; 1324 put(p, REG64, REG16|REG8|MEM8|MEM16); 1325 put(p, REG32, REG16|REG8|MEM8|MEM16); 1326 put(p, REG16, REG8|MEM8); 1327 put(p, "eax, ah"); 1328 } 1329 } 1330 #ifdef XBYAK64 1331 put("movsxd", REG64, REG32|MEM32); 1332 #endif 1333 put("cmpxchg8b", MEM); 1334 #ifdef XBYAK64 1335 put("cmpxchg16b", MEM); 1336 put("fxrstor64", MEM); 1337 put("xbegin", "0x12345678"); 1338 #endif 1339 { 1340 const char tbl[][8] = { 1341 "xadd", 1342 "cmpxchg" 1343 }; 1344 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1345 const char *p = tbl[i]; 1346 put(p, REG8|MEM, REG8); 1347 put(p, REG16|MEM, REG16); 1348 put(p, REG32|MEM, REG32); 1349 put(p, REG64|MEM, REG64); 1350 } 1351 } 1352 1353 put("xchg", AL|REG8, AL|REG8|MEM); 1354 put("xchg", MEM, AL|REG8); 1355 put("xchg", AX|REG16, AX|REG16|MEM); 1356 put("xchg", MEM, AX|REG16); 1357 put("xchg", EAX|REG32, EAX|REG32|MEM); 1358 put("xchg", MEM, EAX|REG32); 1359 put("xchg", REG64, REG64|MEM); 1360 put("xabort", IMM8); 1361 } 1362 void putShift() const 1363 { 1364 const char tbl[][8] = { 1365 "rcl", 1366 "rcr", 1367 "rol", 1368 "ror", 1369 "sar", 1370 "shl", 1371 "shr", 1372 1373 "sal", 1374 }; 1375 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1376 const char *p = tbl[i]; 1377 put(p, REG32e|REG16|REG8|MEM32|MEM16|MEM8, ONE|CL|IMM); 1378 } 1379 } 1380 void putShxd() const 1381 { 1382 const char tbl[][8] = { 1383 "shld", 1384 "shrd", 1385 }; 1386 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1387 const char *p = tbl[i]; 1388 put(p, REG64|MEM, REG64, IMM|CL); 1389 put(p, REG32|MEM, REG32, IMM|CL); 1390 put(p, REG16|MEM, REG16, IMM|CL); 1391 } 1392 } 1393 void putBs() const 1394 { 1395 const char tbl[][8] = { 1396 "bsr", 1397 "bsf", 1398 "lzcnt", 1399 "tzcnt", 1400 "popcnt", 1401 }; 1402 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1403 const char *p = tbl[i]; 1404 put(p, REG64, REG64|MEM); 1405 put(p, REG32, REG32|MEM); 1406 put(p, REG16, REG16|MEM); 1407 } 1408 } 1409 void putSSSE3() const 1410 { 1411 const char tbl[][16] = { 1412 "pshufb", 1413 "phaddw", 1414 "phaddd", 1415 "phaddsw", 1416 "pmaddubsw", 1417 "phsubw", 1418 "phsubd", 1419 "phsubsw", 1420 "psignb", 1421 "psignw", 1422 "psignd", 1423 "pmulhrsw", 1424 "pabsb", 1425 "pabsw", 1426 "pabsd", 1427 }; 1428 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1429 const char *p = tbl[i]; 1430 put(p, XMM, XMM|MEM); 1431 put(p, MMX, MMX|MEM); 1432 } 1433 put("palignr", XMM, XMM|MEM, IMM8); 1434 put("palignr", MMX, MMX|MEM, IMM8); 1435 } 1436 void putSSE4_1() const 1437 { 1438 const char tbl[][16] = { 1439 "blendvpd", 1440 "blendvps", 1441 "packusdw", 1442 "pblendvb", 1443 "pcmpeqq", 1444 "ptest", 1445 "pmovsxbw", 1446 "pmovsxbd", 1447 "pmovsxbq", 1448 "pmovsxwd", 1449 "pmovsxwq", 1450 "pmovsxdq", 1451 "pmovzxbw", 1452 "pmovzxbd", 1453 "pmovzxbq", 1454 "pmovzxwd", 1455 "pmovzxwq", 1456 "pmovzxdq", 1457 "pminsb", 1458 "pminsd", 1459 "pminuw", 1460 "pminud", 1461 "pmaxsb", 1462 "pmaxsd", 1463 "pmaxuw", 1464 "pmaxud", 1465 "pmuldq", 1466 "pmulld", 1467 "phminposuw", 1468 "pcmpgtq", 1469 "aesdec", 1470 "aesdeclast", 1471 "aesenc", 1472 "aesenclast", 1473 "aesimc", 1474 }; 1475 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1476 const char *p = tbl[i]; 1477 put(p, XMM, XMM|MEM); 1478 } 1479 } 1480 void putSSE4_2() const 1481 { 1482 { 1483 const char tbl[][16] = { 1484 "blendpd", 1485 "blendps", 1486 "dppd", 1487 "dpps", 1488 "mpsadbw", 1489 "pblendw", 1490 "roundps", 1491 "roundpd", 1492 "roundss", 1493 "roundsd", 1494 "pcmpestrm", 1495 "pcmpestri", 1496 "pcmpistrm", 1497 "pcmpistri", 1498 "pclmulqdq", 1499 "aeskeygenassist", 1500 }; 1501 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1502 const char *p = tbl[i]; 1503 put(p, XMM, XMM|MEM, IMM); 1504 } 1505 } 1506 put("extractps", REG32e|MEM, XMM, IMM); 1507 put("pextrw", REG32e|MEM, XMM, IMM); // pextrw for REG32 is for MMX2 1508 put("pextrb", REG32e|MEM, XMM, IMM); 1509 put("pextrd", REG32|MEM, XMM, IMM); 1510 1511 put("insertps", XMM, XMM|MEM, IMM); 1512 put("pinsrb", XMM, REG32|MEM, IMM); 1513 put("pinsrd", XMM, REG32|MEM, IMM); 1514 put("movntdqa", XMM, MEM); 1515 put("crc32", REG32, REG8|REG16|REG32|MEM8|MEM16|MEM32); 1516 put("crc32", REG64, REG64|REG8|MEM8); 1517 #ifdef XBYAK64 1518 put("pextrq", REG64|MEM, XMM, IMM); 1519 put("pinsrq", XMM, REG64|MEM, IMM); 1520 #endif 1521 1522 } 1523 void putVpclmulqdq() 1524 { 1525 const char tbl[][16] = { 1526 "vpclmullqlqdq", 1527 "vpclmulhqlqdq", 1528 "vpclmullqhqdq", 1529 "vpclmulhqhqdq", 1530 }; 1531 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1532 const char *p = tbl[i] + 1; // remove the top 'v' 1533 put(p, XMM, XMM|MEM); 1534 p = tbl[i]; // use the top 'v' 1535 put(p, XMM, XMM, XMM|MEM); 1536 put(p, YMM, YMM, YMM|MEM); 1537 put(p, ZMM, ZMM, ZMM|MEM); 1538 } 1539 } 1540 void putSHA() const 1541 { 1542 put("sha1rnds4", XMM, XMM|MEM, IMM); 1543 put("sha1nexte", XMM, XMM|MEM); 1544 put("sha1msg1", XMM, XMM|MEM); 1545 put("sha1msg2", XMM, XMM|MEM); 1546 put("sha256rnds2", XMM, XMM|MEM); 1547 put("sha256msg1", XMM, XMM|MEM); 1548 put("sha256msg2", XMM, XMM|MEM); 1549 } 1550 void putMPX() const 1551 { 1552 #ifdef XBYAK64 1553 const uint64_t reg = REG64; 1554 #else 1555 const uint64_t reg = REG32; 1556 #endif 1557 put("bndcl", BNDREG, reg|MEM); 1558 put("bndcu", BNDREG, reg|MEM); 1559 put("bndcn", BNDREG, reg|MEM); 1560 put("bndldx", BNDREG, MEM); 1561 put("bndmk", BNDREG, MEM); 1562 put("bndmov", BNDREG, BNDREG|MEM); 1563 put("bndstx", MEM, BNDREG); 1564 put("bndstx", "ptr [eax]", "[eax]", BNDREG); 1565 put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG); 1566 put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG); 1567 put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG); 1568 put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG); 1569 put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG); 1570 put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG); 1571 put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG); 1572 put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG); 1573 put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG); 1574 put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG); 1575 #ifdef XBYAK64 1576 put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG); 1577 put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG); 1578 put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG); 1579 #endif 1580 } 1581 void putFpuMem16_32() const 1582 { 1583 const char tbl[][8] = { 1584 "fiadd", 1585 "fidiv", 1586 "fidivr", 1587 "ficom", 1588 "ficomp", 1589 "fimul", 1590 "fist", 1591 "fisub", 1592 "fisubr", 1593 }; 1594 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1595 const char *p = tbl[i]; 1596 put(p, MEM16|MEM32); 1597 } 1598 } 1599 void putFpuMem32_64() const 1600 { 1601 const char tbl[][8] = { 1602 "fadd", 1603 "fcom", 1604 "fcomp", 1605 "fdiv", 1606 "fdivr", 1607 "fld", 1608 "fmul", 1609 "fst", 1610 "fstp", 1611 "fsub", 1612 "fsubr", 1613 }; 1614 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1615 const char *p = tbl[i]; 1616 put(p, MEM32|MEM64); 1617 } 1618 } 1619 void putFpuMem16_32_64() const 1620 { 1621 const char tbl[][8] = { 1622 "fild", 1623 "fistp", 1624 "fisttp", 1625 }; 1626 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1627 const char *p = tbl[i]; 1628 put(p, MEM16|MEM32|MEM64); 1629 } 1630 } 1631 void putFpuFpu() const 1632 { 1633 const struct Tbl { 1634 const char *name; 1635 int mode; /* 1:only (st0, sti), 2: only (sti, st0), 3: both */ 1636 } tbl[] = { 1637 { "fadd", 3 }, 1638 { "faddp", 2 }, 1639 { "fcmovb", 1 }, 1640 { "fcmove", 1 }, 1641 { "fcmovbe", 1 }, 1642 { "fcmovu", 1 }, 1643 { "fcmovnb", 1 }, 1644 { "fcmovne", 1 }, 1645 { "fcmovnbe", 1 }, 1646 { "fcmovnu", 1 }, 1647 { "fcomi", 1 }, 1648 { "fcomip", 1 }, 1649 { "fucomi", 1 }, 1650 { "fucomip", 1 }, 1651 { "fdiv", 3 }, 1652 { "fdivp", 2 }, 1653 { "fdivr", 3 }, 1654 { "fdivrp", 2 }, 1655 { "fmul", 3 }, 1656 { "fmulp", 2 }, 1657 { "fsub", 3 }, 1658 { "fsubp", 2 }, 1659 { "fsubr", 3 }, 1660 { "fsubrp", 2 }, 1661 }; 1662 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1663 const Tbl *p = &tbl[i]; 1664 if (p->mode & 1) put(p->name, ST0, STi); 1665 if (p->mode & 2) put(p->name, STi, ST0); 1666 if (p->mode) put(p->name, STi); 1667 } 1668 } 1669 void putFpu() const 1670 { 1671 const char tbl[][16] = { 1672 "fcom", 1673 "fcomp", 1674 "ffree", 1675 "fld", 1676 "fst", 1677 "fstp", 1678 "fucom", 1679 "fucomp", 1680 "fxch", 1681 }; 1682 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1683 put(tbl[i], STi); 1684 } 1685 } 1686 void putAVX1() 1687 { 1688 const struct Tbl { 1689 const char *name; 1690 bool only_pd_ps; 1691 } tbl[] = { 1692 { "add", false }, 1693 { "sub", false }, 1694 { "mul", false }, 1695 { "div", false }, 1696 { "max", false }, 1697 { "min", false }, 1698 { "and", true }, 1699 { "andn", true }, 1700 { "or", true }, 1701 { "xor", true }, 1702 1703 { "addsub", true }, 1704 { "hadd", true }, 1705 { "hsub", true }, 1706 }; 1707 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1708 const struct Suf { 1709 const char *suf; 1710 bool supportYMM; 1711 } suf[] = { 1712 { "pd", true }, 1713 { "ps", true }, 1714 { "sd", false }, 1715 { "ss", false }, 1716 }; 1717 for (size_t j = 0; j < NUM_OF_ARRAY(suf); j++) { 1718 if (tbl[i].only_pd_ps && j == 2) break; 1719 std::string name = std::string("v") + tbl[i].name + suf[j].suf; 1720 const char *p = name.c_str(); 1721 put(p, XMM, XMM | MEM); 1722 put(p, XMM, XMM, XMM | MEM); 1723 if (!suf[j].supportYMM) continue; 1724 put(p, YMM, YMM | MEM); 1725 put(p, YMM, YMM, YMM | MEM); 1726 } 1727 } 1728 } 1729 void putAVX_X_X_XM_omit() 1730 { 1731 const struct Tbl { 1732 const char *name; 1733 bool supportYMM; 1734 } tbl[] = { 1735 { "vaesenc", false }, 1736 { "vaesenclast", false }, 1737 { "vaesdec", false }, 1738 { "vaesdeclast", false }, 1739 { "vcvtsd2ss", false }, 1740 { "vcvtss2sd", false }, 1741 { "vpacksswb", true }, 1742 { "vpackssdw", true }, 1743 { "vpackuswb", true }, 1744 { "vpackusdw", true }, 1745 1746 { "vpaddb", true }, 1747 { "vpaddw", true }, 1748 { "vpaddd", true }, 1749 { "vpaddq", true }, 1750 1751 { "vpaddsb", true }, 1752 { "vpaddsw", true }, 1753 1754 { "vpaddusb", true }, 1755 { "vpaddusw", true }, 1756 1757 { "vpand", true }, 1758 { "vpandn", true }, 1759 { "vpavgb", true }, 1760 { "vpavgw", true }, 1761 1762 { "vpcmpeqb", true }, 1763 { "vpcmpeqw", true }, 1764 { "vpcmpeqd", true }, 1765 { "vpcmpeqq", true }, 1766 1767 { "vpcmpgtb", true }, 1768 { "vpcmpgtw", true }, 1769 { "vpcmpgtd", true }, 1770 { "vpcmpgtq", true }, 1771 1772 { "vphaddw", true }, 1773 { "vphaddd", true }, 1774 { "vphaddsw", true }, 1775 1776 { "vphsubw", true }, 1777 { "vphsubd", true }, 1778 { "vphsubsw", true }, 1779 { "vpmaddwd", true }, 1780 { "vpmaddubsw", true }, 1781 1782 { "vpmaxsb", true }, 1783 { "vpmaxsw", true }, 1784 { "vpmaxsd", true }, 1785 1786 { "vpmaxub", true }, 1787 { "vpmaxuw", true }, 1788 { "vpmaxud", true }, 1789 1790 { "vpminsb", true }, 1791 { "vpminsw", true }, 1792 { "vpminsd", true }, 1793 1794 { "vpminub", true }, 1795 { "vpminuw", true }, 1796 { "vpminud", true }, 1797 1798 { "vpmulhuw", true }, 1799 { "vpmulhrsw", true }, 1800 { "vpmulhw", true }, 1801 { "vpmullw", true }, 1802 { "vpmulld", true }, 1803 1804 { "vpmuludq", true }, 1805 { "vpmuldq", true }, 1806 1807 { "vpor", true }, 1808 { "vpsadbw", true }, 1809 1810 { "vpsignb", true }, 1811 { "vpsignw", true }, 1812 { "vpsignd", true }, 1813 1814 { "vpsllw", false }, 1815 { "vpslld", false }, 1816 { "vpsllq", false }, 1817 1818 { "vpsraw", false }, 1819 { "vpsrad", false }, 1820 { "vpsrlw", false }, 1821 { "vpsrld", false }, 1822 { "vpsrlq", false }, 1823 1824 { "vpsubb", true }, 1825 { "vpsubw", true }, 1826 { "vpsubd", true }, 1827 { "vpsubq", true }, 1828 1829 { "vpsubsb", true }, 1830 { "vpsubsw", true }, 1831 1832 { "vpsubusb", true }, 1833 { "vpsubusw", true }, 1834 1835 { "vpunpckhbw", true }, 1836 { "vpunpckhwd", true }, 1837 { "vpunpckhdq", true }, 1838 { "vpunpckhqdq", true }, 1839 1840 { "vpunpcklbw", true }, 1841 { "vpunpcklwd", true }, 1842 { "vpunpckldq", true }, 1843 { "vpunpcklqdq", true }, 1844 1845 { "vpxor", true }, 1846 { "vsqrtsd", false }, 1847 { "vsqrtss", false }, 1848 1849 { "vunpckhpd", true }, 1850 { "vunpckhps", true }, 1851 { "vunpcklpd", true }, 1852 { "vunpcklps", true }, 1853 }; 1854 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1855 const Tbl *p = &tbl[i]; 1856 put(p->name, XMM, XMM | MEM); 1857 put(p->name, XMM, XMM, XMM | MEM); 1858 if (!p->supportYMM) continue; 1859 put(p->name, YMM, YMM | MEM); 1860 put(p->name, YMM, YMM, YMM | MEM); 1861 } 1862 } 1863 void putAVX_X_X_XM_IMM() 1864 { 1865 const struct Tbl { 1866 const char *name; 1867 bool supportYMM; 1868 } tbl[] = { 1869 { "vblendpd", true }, 1870 { "vblendps", true }, 1871 { "vdppd", false }, 1872 { "vdpps", true }, 1873 { "vmpsadbw", true }, 1874 { "vpblendw", true }, 1875 { "vpblendd", true }, 1876 { "vroundsd", false }, 1877 { "vroundss", false }, 1878 { "vpclmulqdq", false }, 1879 { "vcmppd", true }, 1880 { "vcmpps", true }, 1881 { "vcmpsd", false }, 1882 { "vcmpss", false }, 1883 { "vinsertps", false }, 1884 { "vpalignr", true }, 1885 { "vshufpd", true }, 1886 { "vshufps", true }, 1887 }; 1888 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1889 const Tbl *p = &tbl[i]; 1890 put(p->name, XMM, XMM, XMM | MEM, IMM); 1891 put(p->name, XMM, XMM | MEM, IMM); 1892 if (!p->supportYMM) continue; 1893 put(p->name, YMM, YMM, YMM | MEM, IMM); 1894 put(p->name, YMM, YMM | MEM, IMM); 1895 } 1896 } 1897 void putAVX_X_XM_IMM() 1898 { 1899 const struct Tbl { 1900 const char *name; 1901 bool supportYMM; 1902 } tbl[] = { 1903 { "vroundpd", true }, 1904 { "vroundps", true }, 1905 { "vpcmpestri", false }, 1906 { "vpcmpestrm", false }, 1907 { "vpcmpistri", false }, 1908 { "vpcmpistrm", false }, 1909 { "vpermilpd", true }, 1910 { "vpermilps", true }, 1911 { "vaeskeygenassist", false }, 1912 { "vpshufd", true }, 1913 { "vpshufhw", true }, 1914 { "vpshuflw", true }, 1915 }; 1916 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1917 const Tbl *p = &tbl[i]; 1918 put(p->name, XMM, XMM | MEM, IMM); 1919 if (!p->supportYMM) continue; 1920 put(p->name, YMM, YMM | MEM, IMM); 1921 } 1922 } 1923 void putAVX_X_X_XM() 1924 { 1925 const struct Tbl { 1926 const char *name; 1927 bool supportYMM; 1928 } tbl[] = { 1929 { "vpermilpd", true }, 1930 { "vpermilps", true }, 1931 { "vpshufb", true }, 1932 1933 { "vpsllvd", true }, 1934 { "vpsllvq", true }, 1935 { "vpsravd", true }, 1936 { "vpsrlvd", true }, 1937 { "vpsrlvq", true }, 1938 }; 1939 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 1940 const Tbl *p = &tbl[i]; 1941 put(p->name, XMM, XMM, XMM | MEM); 1942 if (!p->supportYMM) continue; 1943 put(p->name, YMM, YMM, YMM | MEM); 1944 } 1945 } 1946 void putAVX_X_XM() 1947 { 1948 const struct Tbl { 1949 const char *name; 1950 bool supportYMM; 1951 } tbl[] = { 1952 { "vaesimc", false }, 1953 { "vtestps", true }, 1954 { "vtestpd", true }, 1955 { "vcomisd", false }, 1956 { "vcomiss", false }, 1957 { "vcvtdq2ps", true }, 1958 { "vcvtps2dq", true }, 1959 { "vcvttps2dq", true }, 1960 { "vmovapd", true }, 1961 { "vmovaps", true }, 1962 { "vmovddup", true }, 1963 { "vmovdqa", true }, 1964 { "vmovdqu", true }, 1965 { "vmovupd", true }, 1966 { "vmovups", true }, 1967 1968 { "vpabsb", true }, 1969 { "vpabsw", true }, 1970 { "vpabsd", true }, 1971 { "vphminposuw", false }, 1972 1973 { "vpmovsxbw", false }, 1974 { "vpmovsxbd", false }, 1975 { "vpmovsxbq", false }, 1976 { "vpmovsxwd", false }, 1977 { "vpmovsxwq", false }, 1978 { "vpmovsxdq", false }, 1979 1980 { "vpmovzxbw", false }, 1981 { "vpmovzxbd", false }, 1982 { "vpmovzxbq", false }, 1983 { "vpmovzxwd", false }, 1984 { "vpmovzxwq", false }, 1985 { "vpmovzxdq", false }, 1986 1987 { "vptest", true }, 1988 { "vrcpps", true }, 1989 { "vrcpss", false }, 1990 1991 { "vrsqrtps", true }, 1992 { "vrsqrtss", false }, 1993 1994 { "vsqrtpd", true }, 1995 { "vsqrtps", true }, 1996 { "vucomisd", false }, 1997 { "vucomiss", false }, 1998 }; 1999 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2000 const Tbl *p = &tbl[i]; 2001 put(p->name, XMM, XMM | MEM); 2002 if (!p->supportYMM) continue; 2003 put(p->name, YMM, YMM | MEM); 2004 } 2005 } 2006 void putAVX_Y_XM() 2007 { 2008 const char *tbl[] = { 2009 "vpmovsxbw", 2010 "vpmovsxbd", 2011 "vpmovsxbq", 2012 "vpmovsxwd", 2013 "vpmovsxwq", 2014 "vpmovsxdq", 2015 "vpmovzxbw", 2016 "vpmovzxbd", 2017 "vpmovzxbq", 2018 "vpmovzxwd", 2019 "vpmovzxwq", 2020 "vpmovzxdq", 2021 }; 2022 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2023 const char *name = tbl[i]; 2024 put(name, YMM, XMM); 2025 } 2026 } 2027 void putAVX_M_X() 2028 { 2029 const struct Tbl { 2030 const char *name; 2031 bool supportYMM; 2032 } tbl[] = { 2033 { "vmovapd", true }, 2034 { "vmovaps", true }, 2035 { "vmovdqa", true }, 2036 { "vmovdqu", true }, 2037 { "vmovupd", true }, 2038 { "vmovups", true }, 2039 }; 2040 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2041 const Tbl *p = &tbl[i]; 2042 put(p->name, MEM, XMM); 2043 if (!p->supportYMM) continue; 2044 put(p->name, MEM, YMM); 2045 } 2046 } 2047 void putAVX_X_X_IMM_omit() 2048 { 2049 const struct Tbl { 2050 const char *name; 2051 bool support_Y_Y_X; 2052 } tbl[] = { 2053 { "vpslldq", false }, 2054 { "vpsrldq", false }, 2055 { "vpsllw", true }, 2056 { "vpslld", true }, 2057 { "vpsllq", true }, 2058 { "vpsraw", true }, 2059 { "vpsrad", true }, 2060 { "vpsrlw", true }, 2061 { "vpsrld", true }, 2062 { "vpsrlq", true }, 2063 }; 2064 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2065 const Tbl& p = tbl[i]; 2066 put(p.name, XMM, XMM, IMM); 2067 put(p.name, YMM, YMM, IMM); 2068 put(p.name, YMM, IMM); 2069 put(p.name, _ZMM, _ZMM, IMM8); 2070 #ifdef XBYAK64 2071 put(p.name, _XMM3, _XMM3, IMM8); 2072 put(p.name, _YMM3, _YMM3, IMM8); 2073 #endif 2074 if (p.support_Y_Y_X) { 2075 put(p.name, YMM, YMM, XMM); 2076 } 2077 } 2078 } 2079 void putFMA() 2080 { 2081 const struct Tbl { 2082 const char *name; 2083 bool supportYMM; 2084 } tbl[] = { 2085 { "vfmadd", true }, 2086 { "vfmadd", false }, 2087 { "vfmaddsub", true }, 2088 { "vfmsubadd", true }, 2089 { "vfmsub", true }, 2090 { "vfmsub", false }, 2091 { "vfnmadd", true }, 2092 { "vfnmadd", false }, 2093 { "vfnmsub", true }, 2094 { "vfnmsub", false }, 2095 }; 2096 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2097 const Tbl& p = tbl[i]; 2098 const struct Ord { 2099 const char *name; 2100 } ord[] = { 2101 { "132" }, 2102 { "213" }, 2103 { "231" }, 2104 }; 2105 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) { 2106 const char sufTbl[][2][8] = { 2107 { "pd", "ps" }, 2108 { "sd", "ss" }, 2109 }; 2110 for (size_t k = 0; k < 2; k++) { 2111 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k]; 2112 std::string name = std::string(p.name) + ord[j].name + suf; 2113 const char *q = name.c_str(); 2114 put(q, XMM, XMM, XMM | MEM); 2115 if (!p.supportYMM) continue; 2116 put(q, YMM, YMM, YMM | MEM); 2117 } 2118 } 2119 } 2120 } 2121 void putAVX2() 2122 { 2123 put("vextractps", REG32 | MEM, XMM, IMM); 2124 put("vldmxcsr", MEM); 2125 put("vstmxcsr", MEM); 2126 put("vmaskmovdqu", XMM, XMM); 2127 2128 put("vmovd", XMM, REG32 | MEM); 2129 put("vmovd", REG32 | MEM, XMM); 2130 2131 put("vmovq", XMM, XMM | MEM); 2132 put("vmovq", MEM, XMM); 2133 2134 put("vmovhlps", XMM, XMM); 2135 put("vmovhlps", XMM, XMM, XMM); 2136 put("vmovlhps", XMM, XMM); 2137 put("vmovlhps", XMM, XMM, XMM); 2138 2139 { 2140 const char tbl[][16] = { 2141 "vmovhpd", 2142 "vmovhps", 2143 "vmovlpd", 2144 "vmovlps", 2145 }; 2146 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2147 put(tbl[i], XMM, XMM, MEM); 2148 put(tbl[i], XMM, MEM); 2149 put(tbl[i], MEM, XMM); 2150 } 2151 } 2152 put("vmovmskpd", REG32e, XMM | YMM); 2153 put("vmovmskps", REG32e, XMM | YMM); 2154 2155 put("vmovntdq", MEM, XMM | YMM); 2156 put("vmovntpd", MEM, XMM | YMM); 2157 put("vmovntdqa", XMM | YMM, MEM); 2158 2159 { 2160 const char tbl[][8] = { "vmovsd", "vmovss" }; 2161 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2162 put(tbl[i], XMM, XMM, XMM); 2163 put(tbl[i], XMM, XMM | MEM); 2164 put(tbl[i], MEM, XMM); 2165 } 2166 } 2167 put("vpextrb", REG32e|MEM, XMM, IMM); 2168 put("vpextrd", REG32|MEM, XMM, IMM); 2169 2170 for (int i = 0; i < 3; i++) { 2171 const char tbl[][8] = { "vpinsrb", "vpinsrw", "vpinsrd" }; 2172 put(tbl[i], XMM, XMM, REG32|MEM, IMM); 2173 put(tbl[i], XMM, REG32|MEM, IMM); 2174 } 2175 2176 put("vpmovmskb", REG32e, XMM|YMM); 2177 2178 { 2179 const struct Tbl { 2180 const char *name; 2181 bool supportYMM; 2182 } tbl[] = { 2183 { "vblendvpd", true }, 2184 { "vblendvps", true }, 2185 { "vpblendvb", true }, 2186 }; 2187 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2188 const Tbl& p = tbl[i]; 2189 put(p.name, XMM, XMM, XMM | MEM, XMM); 2190 put(p.name, XMM, XMM | MEM, XMM); 2191 if (!p.supportYMM) continue; 2192 put(p.name, YMM, YMM, YMM | MEM, YMM); 2193 put(p.name, YMM, YMM | MEM, YMM); 2194 } 2195 } 2196 // cvt 2197 { 2198 put("vcvtss2si", REG32e, XMM | MEM); 2199 put("vcvttss2si", REG32e, XMM | MEM); 2200 put("vcvtsd2si", REG32e, XMM | MEM); 2201 put("vcvttsd2si", REG32e, XMM | MEM); 2202 2203 put("vcvtsi2ss", XMM, XMM, REG32e | MEM); 2204 put("vcvtsi2ss", XMM, REG32e | MEM); 2205 2206 put("vcvtsi2sd", XMM, XMM, REG32e | MEM); 2207 put("vcvtsi2sd", XMM, REG32e | MEM); 2208 #ifdef XBYAK64 2209 put("vcvtsi2sd", XMM, XMM, MEM64); 2210 put("vcvtsi2sd", XMM, MEM64); 2211 #endif 2212 2213 put("vcvtps2pd", XMM | YMM, XMM | MEM); 2214 put("vcvtdq2pd", XMM | YMM, XMM | MEM); 2215 2216 put("vcvtpd2ps", XMM, XMM | YMM | MEM); 2217 put("vcvtpd2dq", XMM, XMM | YMM | MEM); 2218 put("vcvttpd2dq", XMM, XMM | YMM | MEM); 2219 2220 put("vcvtph2ps", XMM | YMM, XMM | MEM); 2221 put("vcvtps2ph", XMM | MEM, XMM | YMM, IMM8); 2222 } 2223 #ifdef XBYAK64 2224 put("vmovq", XMM, REG64); 2225 put("vmovq", REG64, XMM); 2226 2227 put("vpextrq", REG64|MEM, XMM, IMM); 2228 2229 put("vpinsrq", XMM, XMM, REG64|MEM, IMM); 2230 put("vpinsrq", XMM, REG64|MEM, IMM); 2231 2232 #endif 2233 } 2234 void putFMA2() 2235 { 2236 #ifdef USE_YASM 2237 put("vextractf128", XMM | MEM, YMM, IMM); 2238 put("vextracti128", XMM | MEM, YMM, IMM); 2239 put("vmaskmovps", MEM, YMM, YMM); 2240 put("vmaskmovpd", MEM, YMM, YMM); 2241 put("vlddqu", XMM | YMM, MEM); 2242 2243 put("vmovshdup", XMM, XMM | MEM); 2244 put("vmovshdup", YMM, YMM | MEM); 2245 put("vmovsldup", XMM, XMM | MEM); 2246 put("vmovsldup", YMM, YMM | MEM); 2247 2248 // QQQ:nasm is wrong 2249 put("vpcmpeqq", XMM, XMM | MEM); 2250 put("vpcmpeqq", XMM, XMM, XMM | MEM); 2251 put("vpcmpgtq", XMM, XMM | MEM); 2252 put("vpcmpgtq", XMM, XMM, XMM | MEM); 2253 2254 put("vmovntps", MEM, XMM | YMM); // nasm error 2255 #else 2256 put("vmaskmovps", XMM, XMM, MEM); 2257 put("vmaskmovps", YMM, YMM, MEM); 2258 2259 put("vmaskmovpd", YMM, YMM, MEM); 2260 put("vmaskmovpd", XMM, XMM, MEM); 2261 2262 put("vmaskmovps", MEM, XMM, XMM); 2263 put("vmaskmovpd", MEM, XMM, XMM); 2264 #endif 2265 } 2266 void putCmp() 2267 { 2268 const char pred[32][16] = { 2269 "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", 2270 "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", 2271 "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", 2272 "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" 2273 }; 2274 const char suf[][4] = { "pd", "ps", "sd", "ss" }; 2275 for (int i = 0; i < 4; i++) { 2276 for (int j = 0; j < 32; j++) { 2277 if (j < 8) { 2278 put((std::string("cmp") + pred[j] + suf[i]).c_str(), XMM, XMM | MEM); 2279 } 2280 std::string str = std::string("vcmp") + pred[j] + suf[i]; 2281 const char *p = str.c_str(); 2282 put(p, XMM, XMM | MEM); 2283 put(p, XMM, XMM, XMM | MEM); 2284 if (i >= 2) continue; 2285 put(p, YMM, YMM | MEM); 2286 put(p, YMM, YMM, YMM | MEM); 2287 } 2288 } 2289 } 2290 void putRip() 2291 { 2292 const char tbl[][2][64] = { 2293 { "mov(byte [rip - 10], 3);dump();", "mov byte [rip - 10], 3" }, 2294 { "mov(word [rip - 10], 3);dump();", "mov word [rip - 10], 3" }, 2295 { "mov(dword[rip - 10], 3);dump();", "mov dword [rip - 10], 3" }, 2296 { "mov(qword [rip - 10], 3);dump();", "mov qword [rip - 10], 3" }, 2297 { "mov(ptr [rip - 10], al);dump();", "mov byte [rip - 10], al" }, 2298 { "mov(ptr [rip - 10], ax);dump();", "mov word [rip - 10], ax" }, 2299 { "mov(ptr [rip - 10], eax);dump();", "mov dword [rip - 10], eax" }, 2300 { "mov(ptr [rip - 10], rax);dump();", "mov qword [rip - 10], rax" }, 2301 }; 2302 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2303 puts(tbl[i][isXbyak_ ? 0 : 1]); 2304 } 2305 } 2306 public: 2307 Test(bool isXbyak) 2308 : isXbyak_(isXbyak) 2309 , funcNum_(1) 2310 { 2311 if (!isXbyak_) return; 2312 printf("%s", 2313 " void gen0()\n" 2314 " {\n"); 2315 } 2316 /* 2317 gcc and vc give up to compile this source, 2318 so I split functions. 2319 */ 2320 void separateFunc() 2321 { 2322 if (!isXbyak_) return; 2323 printf( 2324 " }\n" 2325 " void gen%d()\n" 2326 " {\n", funcNum_++); 2327 } 2328 ~Test() 2329 { 2330 if (!isXbyak_) return; 2331 printf("%s", 2332 " }\n" 2333 " void gen()\n" 2334 " {\n"); 2335 for (int i = 0; i < funcNum_; i++) { 2336 printf( 2337 " gen%d();\n", i); 2338 } 2339 printf( 2340 " }\n"); 2341 } 2342 void putGprR_R_RM() 2343 { 2344 const char *tbl[] = { 2345 "andn", 2346 "mulx", 2347 "pdep", 2348 "pext", 2349 }; 2350 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2351 const char *name = tbl[i]; 2352 put(name, REG32, REG32, REG32 | MEM); 2353 #ifdef XBYAK64 2354 put(name, REG64, REG64, REG64 | MEM); 2355 #endif 2356 } 2357 } 2358 void putGprR_RM_R() 2359 { 2360 const char *tbl[] = { 2361 "bextr", 2362 "bzhi", 2363 "sarx", 2364 "shlx", 2365 "shrx", 2366 }; 2367 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2368 const char *name = tbl[i]; 2369 put(name, REG32, REG32 | MEM, REG32); 2370 #ifdef XBYAK64 2371 put(name, REG64, REG64 | MEM, REG64); 2372 #endif 2373 } 2374 } 2375 void putGprR_RM() 2376 { 2377 const char *tbl[] = { 2378 "blsi", 2379 "blsmsk", 2380 "blsr", 2381 }; 2382 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2383 const char *name = tbl[i]; 2384 put(name, REG32, REG32 | MEM); 2385 #ifdef XBYAK64 2386 put(name, REG64, REG64 | MEM); 2387 #endif 2388 } 2389 } 2390 void putGprOtherwise() 2391 { 2392 put("rdrand", REG16 | REG32e); 2393 put("rdseed", REG16 | REG32e); 2394 put("rorx", REG32, REG32 | MEM, IMM8); 2395 #ifdef XBYAK64 2396 put("rorx", REG64, REG64 | MEM, IMM8); 2397 #endif 2398 } 2399 void putGather() 2400 { 2401 const int y_vx_y = 0; 2402 const int y_vy_y = 1; 2403 const int x_vy_x = 2; 2404 const struct Tbl { 2405 const char *name; 2406 int mode; 2407 } tbl[] = { 2408 { "vgatherdpd", y_vx_y }, 2409 { "vgatherqpd", y_vy_y }, 2410 { "vgatherdps", y_vy_y }, 2411 { "vgatherqps", x_vy_x }, 2412 { "vpgatherdd", y_vy_y }, 2413 { "vpgatherqd", x_vy_x }, 2414 { "vpgatherdq", y_vx_y }, 2415 { "vpgatherqq", y_vy_y }, 2416 }; 2417 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2418 const Tbl& p = tbl[i]; 2419 const char *name = p.name; 2420 put(name, "xmm3", VM32X, "xmm5"); 2421 switch (p.mode) { 2422 case y_vx_y: 2423 put(name, "ymm3", VM32X, "ymm5"); 2424 break; 2425 case y_vy_y: 2426 put(name, "ymm3", VM32Y, "ymm5"); 2427 break; 2428 case x_vy_x: 2429 put(name, "xmm3", VM32Y, "xmm5"); 2430 break; 2431 default: 2432 printf("ERR mode=%d\n", p.mode); 2433 exit(1); 2434 } 2435 } 2436 } 2437 void putGath(const std::string& vsib) 2438 { 2439 std::string x = "xmm1, "; 2440 std::string a = std::string("[") + vsib + "], xmm3"; 2441 put("vgatherdpd", (x + "ptr" + a).c_str(), (x + a).c_str()); 2442 } 2443 2444 void putGatherAll() 2445 { 2446 const char *xmmTbl[] = { 2447 "xmm2", 2448 "xmm4", 2449 "xmm2*1", 2450 "xmm2*4", 2451 }; 2452 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) { 2453 std::string s = xmmTbl[i]; 2454 putGath(s); 2455 putGath(s + "+3"); 2456 putGath(s + "+eax"); 2457 putGath("3+" + s); 2458 putGath("eax+" + s); 2459 } 2460 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) { 2461 int ord[] = { 0, 1, 2 }; 2462 do { 2463 std::string s; 2464 for (int j = 0; j < 3; j++) { 2465 if (j > 0) s += '+'; 2466 switch (ord[j]) { 2467 case 0: s += xmmTbl[i]; break; 2468 case 1: s += "123"; break; 2469 case 2: s += "ebp"; break; 2470 } 2471 } 2472 putGath(s); 2473 } while (std::next_permutation(ord, ord + 3)); 2474 } 2475 } 2476 void putSeg() 2477 { 2478 { 2479 const char *segTbl[] = { 2480 "es", 2481 "cs", 2482 "ss", 2483 "ds", 2484 "fs", 2485 "gs", 2486 }; 2487 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) { 2488 const char *seg = segTbl[i]; 2489 const char *op1Tbl[] = { 2490 "ax", 2491 "edx", 2492 (isXbyak_ ? "ptr [eax]" : "[eax]"), 2493 #ifdef XBYAK64 2494 "r9", 2495 #endif 2496 }; 2497 for (size_t j = 0; j < NUM_OF_ARRAY(op1Tbl); j++) { 2498 const char *op1 = op1Tbl[j]; 2499 if (isXbyak_) { 2500 printf("mov(%s, %s); dump();\n", op1, seg); 2501 printf("mov(%s, %s); dump();\n", seg, op1); 2502 } else { 2503 printf("mov %s, %s\n", op1, seg); 2504 printf("mov %s, %s\n", seg, op1); 2505 } 2506 } 2507 } 2508 } 2509 { 2510 const char *segTbl[] = { 2511 #ifdef XBYAK32 2512 "es", 2513 "ss", 2514 "ds", 2515 #endif 2516 "fs", 2517 "gs", 2518 }; 2519 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) { 2520 const char *seg = segTbl[i]; 2521 if (isXbyak_) { 2522 printf("push(%s); dump();\n", seg); 2523 printf("pop(%s); dump();\n", seg); 2524 } else { 2525 printf("push %s\n", seg); 2526 printf("pop %s\n", seg); 2527 } 2528 } 2529 } 2530 } 2531 void put() 2532 { 2533 #ifdef USE_AVX512 2534 putAVX512(); 2535 #else 2536 2537 #ifdef USE_AVX 2538 2539 separateFunc(); 2540 putFMA2(); 2541 2542 #ifdef USE_YASM 2543 putGprR_R_RM(); 2544 putGprR_RM_R(); 2545 putGprR_RM(); 2546 putGprOtherwise(); 2547 putGather(); 2548 putGatherAll(); 2549 #else 2550 putAVX1(); 2551 separateFunc(); 2552 putAVX2(); 2553 putAVX_X_X_XM_omit(); 2554 separateFunc(); 2555 putAVX_X_X_XM_IMM(); 2556 separateFunc(); 2557 putAVX_X_XM_IMM(); 2558 separateFunc(); 2559 putAVX_X_X_XM(); 2560 separateFunc(); 2561 putAVX_X_XM(); 2562 separateFunc(); 2563 putAVX_M_X(); 2564 putAVX_X_X_IMM_omit(); 2565 separateFunc(); 2566 putAVX_Y_XM(); 2567 separateFunc(); 2568 putFMA(); 2569 putSHA(); 2570 #endif 2571 2572 #else // USE_AVX 2573 2574 putJmp(); 2575 putFarJmp(); 2576 2577 #ifdef USE_YASM 2578 2579 putSSSE3(); 2580 putSSE4_1(); 2581 separateFunc(); 2582 putSSE4_2(); 2583 putSeg(); // same behavior as yasm for mov rax, cx 2584 putPushPop8_16(); 2585 #else 2586 putSIMPLE(); 2587 putVpclmulqdq(); 2588 putReg1(); 2589 putBt(); 2590 putRorM(); 2591 separateFunc(); 2592 putPushPop(); 2593 putTest(); 2594 separateFunc(); 2595 putLoadSeg(); 2596 putEtc(); 2597 putShift(); 2598 putShxd(); 2599 2600 separateFunc(); 2601 2602 putBs(); 2603 putMMX1(); 2604 putMMX2(); 2605 separateFunc(); 2606 putMMX3(); 2607 putMMX4(); 2608 putMMX5(); 2609 separateFunc(); 2610 putXMM1(); 2611 putXMM2(); 2612 putXMM3(); 2613 putXMM4(); 2614 separateFunc(); 2615 putCmov(); 2616 putFpuMem16_32(); 2617 putFpuMem32_64(); 2618 separateFunc(); 2619 putFpuMem16_32_64(); 2620 putFpu(); 2621 putFpuFpu(); 2622 putCmp(); 2623 putMPX(); 2624 #endif 2625 2626 #if defined(XBYAK64) && !defined(__ILP32__) 2627 2628 #ifdef USE_YASM 2629 putRip(); 2630 #else 2631 putMov64(); 2632 putMovImm64(); 2633 #endif 2634 2635 #endif // XBYAK64 2636 2637 #endif // USE_AVX 2638 2639 #endif // USE_AVX512 2640 } 2641 #ifdef USE_AVX512 2642 void putOpmask() 2643 { 2644 { 2645 const char *tbl[] = { 2646 "kadd", 2647 "kand", 2648 "kandn", 2649 "kor", 2650 "kxnor", 2651 "kxor", 2652 }; 2653 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2654 std::string name = tbl[i]; 2655 put(name + "b", K, K, K); 2656 put(name + "w", K, K, K); 2657 put(name + "q", K, K, K); 2658 put(name + "d", K, K, K); 2659 } 2660 put("kunpckbw", K, K, K); 2661 put("kunpckwd", K, K, K); 2662 put("kunpckdq", K, K, K); 2663 } 2664 { 2665 const char *tbl[] = { 2666 "knot", 2667 "kortest", 2668 "ktest", 2669 }; 2670 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2671 std::string name = tbl[i]; 2672 put(name + "b", K, K); 2673 put(name + "w", K, K); 2674 put(name + "q", K, K); 2675 put(name + "d", K, K); 2676 } 2677 } 2678 { 2679 const char *tbl[] = { 2680 "kshiftl", 2681 "kshiftr", 2682 }; 2683 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2684 std::string name = tbl[i]; 2685 put(name + "b", K, K, IMM8); 2686 put(name + "w", K, K, IMM8); 2687 put(name + "q", K, K, IMM8); 2688 put(name + "d", K, K, IMM8); 2689 } 2690 } 2691 put("kmovw", K, K | MEM | REG32); 2692 put("kmovq", K, K | MEM); 2693 put("kmovb", K, K | MEM | REG32); 2694 put("kmovd", K, K | MEM | REG32); 2695 2696 put("kmovw", MEM | REG32, K); 2697 put("kmovq", MEM, K); 2698 put("kmovb", MEM | REG32, K); 2699 put("kmovd", MEM | REG32, K); 2700 #ifdef XBYAK64 2701 put("kmovq", K, REG64); 2702 put("kmovq", REG64, K); 2703 #endif 2704 } 2705 void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false, int sae = 0) 2706 { 2707 std::string modifier; 2708 char pk[16] = ""; 2709 const char *pz = ""; 2710 const char *saeTblXbyak[] = { "", "|T_rn_sae", "|T_rd_sae", "|T_ru_sae", "|T_rz_sae" }; 2711 const char *saeTblNASM[] = { "", ",{rn-sae}", ",{rd-sae}", ",{ru-sae}", ",{rz-sae}" }; 2712 if (isXbyak_) { 2713 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx); 2714 if (z) pz = "|T_z"; 2715 printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]); 2716 } else { 2717 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx); 2718 if (z && kIdx) pz = "{z}"; 2719 printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]); 2720 } 2721 } 2722 void putCombi() 2723 { 2724 const char *xTbl[] = { 2725 "xmm2", 2726 #ifdef XBYAK64 2727 "xmm8", "xmm31" 2728 #else 2729 "xmm5", "xmm6" 2730 #endif 2731 }; 2732 const char *yTbl[] = { 2733 "ymm0", 2734 #ifdef XBYAK64 2735 "ymm15", "ymm31" 2736 #else 2737 "ymm4", "ymm2" 2738 #endif 2739 }; 2740 const char *zTbl[] = { 2741 "zmm1", 2742 #ifdef XBYAK64 2743 "zmm9", "zmm30" 2744 #else 2745 "zmm3", "zmm7" 2746 #endif 2747 }; 2748 const size_t N = NUM_OF_ARRAY(zTbl); 2749 for (size_t i = 0; i < N; i++) { 2750 for (size_t j = 0; j < N; j++) { 2751 separateFunc(); 2752 for (size_t k = 0; k < N; k++) { 2753 #ifdef XBYAK64 2754 for (int kIdx = 0; kIdx < 8; kIdx++) { 2755 put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx); 2756 put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx); 2757 for (int z = 0; z < 2; z++) { 2758 for (int sae = 0; sae < 5; sae++) { 2759 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae); 2760 } 2761 } 2762 } 2763 #else 2764 put_vaddpd(xTbl[i], xTbl[j], xTbl[k]); 2765 put_vaddpd(yTbl[i], yTbl[j], yTbl[k]); 2766 for (int sae = 0; sae < 5; sae++) { 2767 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], sae); 2768 } 2769 #endif 2770 } 2771 } 2772 } 2773 put("vaddpd", XMM, XMM, _MEM); 2774 put("vaddpd", YMM, YMM, _MEM); 2775 put("vaddpd", ZMM, ZMM, _MEM); 2776 } 2777 void putCmpK() 2778 { 2779 { 2780 const struct Tbl { 2781 const char *name; 2782 bool supportYMM; 2783 } tbl[] = { 2784 { "vcmppd", true }, 2785 { "vcmpps", true }, 2786 { "vcmpsd", false }, 2787 { "vcmpss", false }, 2788 }; 2789 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2790 const Tbl *p = &tbl[i]; 2791 put(p->name, K, _XMM, _XMM | MEM, IMM); 2792 if (!p->supportYMM) continue; 2793 put(p->name, K, _YMM, _YMM | MEM, IMM); 2794 put(p->name, K, _ZMM, _ZMM | MEM, IMM); 2795 } 2796 } 2797 put("vcmppd", K2, ZMM, ZMM_SAE, IMM); 2798 #ifdef XBYAK64 2799 { 2800 const struct Tbl { 2801 const char *name; 2802 } tbl[] = { 2803 { "vcomisd" }, 2804 { "vcomiss" }, 2805 { "vucomisd" }, 2806 { "vucomiss" }, 2807 }; 2808 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2809 const Tbl *p = &tbl[i]; 2810 put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM); 2811 } 2812 } 2813 put("vcomiss", _XMM3, XMM | MEM); 2814 put("vcomiss", XMM, XMM_SAE); 2815 #endif 2816 } 2817 void putBroadcastSub(int idx, int disp) 2818 { 2819 #ifdef XBYAK64 2820 const char *a = "rax"; 2821 #else 2822 const char *a = "eax"; 2823 #endif 2824 if (isXbyak_) { 2825 printf("vaddpd(zmm%d, zmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp); 2826 printf("vaddpd(ymm%d, ymm1, ptr_b[%s+%d]);dump();\n", idx, a, disp); 2827 printf("vaddpd(xmm%d, xmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp); 2828 } else { 2829 printf("vaddpd zmm%d, zmm1, [%s+%d]{1to8}\n", idx, a, disp); 2830 printf("vaddpd ymm%d, ymm1, [%s+%d]{1to4}\n", idx, a, disp); 2831 printf("vaddpd xmm%d, xmm1, [%s+%d]{1to2}\n", idx, a, disp); 2832 } 2833 } 2834 void putBroadcast() 2835 { 2836 for (int i = 0; i < 9; i++) { 2837 putBroadcastSub(0, i); 2838 #ifdef XBYAK64 2839 putBroadcastSub(10, i); 2840 putBroadcastSub(20, i); 2841 #endif 2842 } 2843 put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8); 2844 put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16); 2845 put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32); 2846 #ifdef XBYAK64 2847 put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64); 2848 #endif 2849 { 2850 const char *tbl[] = { 2851 "vpbroadcastb", 2852 "vpbroadcastw", 2853 "vpbroadcastd", 2854 "vpbroadcastq", 2855 }; 2856 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2857 put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM); 2858 } 2859 } 2860 put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM); 2861 put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM); 2862 put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM); 2863 put("vbroadcasti32x8", ZMM_KZ, _MEM); 2864 put("vbroadcasti64x4", ZMM_KZ, _MEM); 2865 } 2866 void putAVX512_M_X() 2867 { 2868 const char *tbl[] = { 2869 "vmovapd", 2870 "vmovaps", 2871 "vmovupd", 2872 "vmovups", 2873 }; 2874 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2875 const char *name = tbl[i]; 2876 put(name, MEM|MEM_K, ZMM|XMM|YMM); 2877 put(name, ZMM, MEM); 2878 } 2879 } 2880 void put_vmov() 2881 { 2882 #ifdef XBYAK64 2883 put("vmovd", _XMM3, MEM|REG32); 2884 put("vmovd", MEM|REG32, _XMM3); 2885 put("vmovq", _XMM3, MEM|REG64|XMM); 2886 put("vmovq", MEM|REG64|XMM, _XMM3); 2887 put("vmovhlps", _XMM3, _XMM3, _XMM3); 2888 put("vmovlhps", _XMM3, _XMM3, _XMM3); 2889 put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM); 2890 put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM); 2891 put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM); 2892 put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM); 2893 2894 put("vmovsd", XMM_KZ, _XMM3, _XMM3); 2895 put("vmovsd", XMM_KZ, MEM); 2896 put("vmovsd", MEM_K, XMM); 2897 put("vmovss", XMM_KZ, _XMM3, _XMM3); 2898 put("vmovss", XMM_KZ, MEM); 2899 put("vmovss", MEM_K, XMM); 2900 2901 put("vmovshdup", _ZMM, _ZMM); 2902 put("vmovsldup", _ZMM, _ZMM); 2903 2904 2905 { 2906 const char *tbl[] = { 2907 "valignd", 2908 "valignq", 2909 }; 2910 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2911 const char *name = tbl[i]; 2912 put(name, XMM_KZ, _XMM, _XMM | MEM, IMM); 2913 put(name, _YMM3, _YMM3, _YMM3, IMM); 2914 put(name, _ZMM, _ZMM, _ZMM, IMM); 2915 } 2916 } 2917 { 2918 const char tbl[][16] = { 2919 "vmovhpd", 2920 "vmovhps", 2921 "vmovlpd", 2922 "vmovlps", 2923 }; 2924 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2925 put(tbl[i], _XMM3, _XMM3, MEM); 2926 put(tbl[i], MEM, _XMM3); 2927 } 2928 } 2929 #endif 2930 } 2931 void put512_X_XM() 2932 { 2933 const struct Tbl { 2934 const char *name; 2935 bool M_X; 2936 } tbl[] = { 2937 { "vmovddup", false }, 2938 { "vmovdqa32", true }, 2939 { "vmovdqa64", true }, 2940 { "vmovdqu8", true }, 2941 { "vmovdqu16", true }, 2942 { "vmovdqu32", true }, 2943 { "vmovdqu64", true }, 2944 { "vpabsb", false }, 2945 { "vpabsw", false }, 2946 { "vpabsd", false }, 2947 { "vpabsq", false }, 2948 }; 2949 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2950 const Tbl& p = tbl[i]; 2951 put(p.name, _XMM|XMM_KZ, _XMM|MEM); 2952 put(p.name, _YMM|YMM_KZ, _YMM|MEM); 2953 put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM); 2954 if (!p.M_X) continue; 2955 put(p.name, MEM, _XMM); 2956 put(p.name, MEM, _YMM); 2957 put(p.name, MEM, _ZMM); 2958 } 2959 put("vsqrtpd", XMM_KZ, M_1to2); 2960 put("vsqrtpd", YMM_KZ, M_1to4); 2961 put("vsqrtpd", ZMM_KZ, M_1to8); 2962 put("vsqrtpd", ZMM_KZ, ZMM_ER); 2963 2964 put("vsqrtps", XMM_KZ, M_1to4); 2965 put("vsqrtps", YMM_KZ, M_1to8); 2966 put("vsqrtps", ZMM_KZ, M_1to16); 2967 put("vsqrtps", ZMM_KZ, ZMM_ER); 2968 2969 put("vpabsd", ZMM_KZ, M_1to16); 2970 put("vpabsq", ZMM_KZ, M_1to8); 2971 2972 put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM); 2973 put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM); 2974 2975 put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM); 2976 put("vbroadcastf64x4", ZMM_KZ, _MEM); 2977 } 2978 void put512_X_X_XM() 2979 { 2980 const struct Tbl { 2981 const char *name; 2982 uint64_t mem; 2983 } tbl[] = { 2984 { "vsqrtsd", MEM }, 2985 { "vsqrtss", MEM }, 2986 { "vunpckhpd", M_1to2 }, 2987 { "vunpckhps", M_1to4 }, 2988 { "vunpcklpd", M_1to2 }, 2989 { "vunpcklps", M_1to4 }, 2990 }; 2991 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 2992 const Tbl& p = tbl[i]; 2993 put(p.name, XMM_KZ, _XMM, _XMM|p.mem); 2994 } 2995 } 2996 void put512_X3() 2997 { 2998 #ifdef XBYAK64 2999 const struct Tbl { 3000 const char *name; 3001 uint64_t x1; 3002 uint64_t x2; 3003 uint64_t xm; 3004 } tbl[] = { 3005 { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM }, 3006 { "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM }, 3007 { "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3008 3009 { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 }, 3010 { "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 }, 3011 { "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 }, 3012 3013 { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 }, 3014 { "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 }, 3015 { "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 }, 3016 3017 { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM }, 3018 { "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM }, 3019 { "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3020 3021 { "vpaddb", XMM_KZ, _XMM, _XMM | _MEM }, 3022 { "vpaddw", XMM_KZ, _XMM, _XMM | _MEM }, 3023 { "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 }, 3024 { "vpaddq", ZMM_KZ, _ZMM, M_1to8 }, 3025 3026 { "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM }, 3027 { "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3028 3029 { "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM }, 3030 { "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3031 3032 { "vpaddusb", XMM_KZ, _XMM, _XMM | MEM }, 3033 { "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM }, 3034 3035 { "vpaddusw", XMM_KZ, _XMM, _XMM | MEM }, 3036 { "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM }, 3037 3038 { "vpsubb", XMM_KZ, _XMM, _XMM | _MEM }, 3039 { "vpsubw", XMM_KZ, _XMM, _XMM | _MEM }, 3040 { "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 }, 3041 { "vpsubq", ZMM_KZ, _ZMM, M_1to8 }, 3042 3043 { "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM }, 3044 { "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3045 3046 { "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM }, 3047 { "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3048 3049 { "vpsubusb", XMM_KZ, _XMM, _XMM | MEM }, 3050 { "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM }, 3051 3052 { "vpsubusw", XMM_KZ, _XMM, _XMM | MEM }, 3053 { "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM }, 3054 3055 { "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 }, 3056 { "vpandq", ZMM_KZ, _ZMM, _ZMM | M_1to8 }, 3057 3058 { "vpandnd", ZMM_KZ, _ZMM, _ZMM | M_1to16 }, 3059 { "vpandnq", ZMM_KZ, _ZMM, _ZMM | M_1to8 }, 3060 3061 { "vpavgb", ZMM_KZ, _ZMM, _ZMM }, 3062 { "vpavgw", ZMM_KZ, _ZMM, _ZMM }, 3063 3064 { "vpcmpeqb", K2, _ZMM, _ZMM | _MEM }, 3065 { "vpcmpeqw", K2, _ZMM, _ZMM | _MEM }, 3066 { "vpcmpeqd", K2, _ZMM, _ZMM | M_1to16 }, 3067 { "vpcmpeqq", K2, _ZMM, _ZMM | M_1to8 }, 3068 3069 { "vpcmpgtb", K2, _ZMM, _ZMM | _MEM }, 3070 { "vpcmpgtw", K2, _ZMM, _ZMM | _MEM }, 3071 { "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 }, 3072 { "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 }, 3073 3074 { "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3075 { "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3076 3077 { "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3078 { "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3079 { "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 }, 3080 { "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 }, 3081 3082 { "vpmaxub", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3083 { "vpmaxuw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3084 { "vpmaxud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 }, 3085 { "vpmaxuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 }, 3086 3087 { "vpminsb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3088 { "vpminsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3089 { "vpminsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 }, 3090 { "vpminsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 }, 3091 3092 { "vpminub", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3093 { "vpminuw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3094 { "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 }, 3095 { "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 }, 3096 3097 { "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 }, 3098 { "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 }, 3099 { "vpslldq", _ZMM, _ZMM | _MEM, IMM8 }, 3100 3101 { "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 }, 3102 { "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 }, 3103 { "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 }, 3104 3105 { "vpsraw", XMM_KZ, _XMM, IMM8 }, 3106 { "vpsraw", ZMM_KZ, _ZMM, IMM8 }, 3107 3108 { "vpsrad", XMM_KZ, _XMM | M_1to4, IMM8 }, 3109 { "vpsrad", ZMM_KZ, _ZMM | M_1to16, IMM8 }, 3110 3111 { "vpsraq", XMM, XMM, IMM8 }, 3112 { "vpsraq", XMM_KZ, _XMM | M_1to2, IMM8 }, 3113 { "vpsraq", ZMM_KZ, _ZMM | M_1to8, IMM8 }, 3114 3115 { "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 }, 3116 { "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 }, 3117 { "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 }, 3118 3119 { "vpsrlw", XMM_KZ, _XMM, IMM8 }, 3120 { "vpsrlw", ZMM_KZ, _ZMM, IMM8 }, 3121 3122 { "vpsrld", XMM_KZ, _XMM | M_1to4, IMM8 }, 3123 { "vpsrld", ZMM_KZ, _ZMM | M_1to16, IMM8 }, 3124 3125 { "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 }, 3126 { "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 }, 3127 3128 { "vpsravw", XMM_KZ | _XMM, _XMM, _XMM }, 3129 { "vpsravw", _ZMM, _ZMM, _MEM }, 3130 3131 { "vpsravd", XMM_KZ | _XMM, _XMM, _XMM }, 3132 { "vpsravd", _ZMM, _ZMM, M_1to16 }, 3133 3134 { "vpsravq", XMM_KZ | _XMM, _XMM, _XMM }, 3135 { "vpsravq", _ZMM, _ZMM, M_1to8 }, 3136 3137 { "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM }, 3138 { "vpsllvw", _ZMM, _ZMM, _MEM }, 3139 3140 { "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM }, 3141 { "vpsllvd", _ZMM, _ZMM, M_1to16 }, 3142 3143 { "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM }, 3144 { "vpsllvq", _ZMM, _ZMM, M_1to8 }, 3145 3146 { "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM }, 3147 { "vpsrlvw", _ZMM, _ZMM, _MEM }, 3148 3149 { "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM }, 3150 { "vpsrlvd", _ZMM, _ZMM, M_1to16 }, 3151 3152 { "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM }, 3153 { "vpsrlvq", _ZMM, _ZMM, M_1to8 }, 3154 3155 { "vpshufb", _XMM | XMM_KZ, _XMM, _XMM }, 3156 { "vpshufb", ZMM_KZ, _ZMM, _MEM }, 3157 3158 { "vpshufhw", _XMM | XMM_KZ, _XMM, IMM8 }, 3159 { "vpshufhw", ZMM_KZ, _MEM, IMM8 }, 3160 3161 { "vpshuflw", _XMM | XMM_KZ, _XMM, IMM8 }, 3162 { "vpshuflw", ZMM_KZ, _MEM, IMM8 }, 3163 3164 { "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4, IMM8 }, 3165 { "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16, IMM8 }, 3166 3167 { "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 }, 3168 { "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 }, 3169 3170 { "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 }, 3171 { "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 }, 3172 3173 { "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 }, 3174 { "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 }, 3175 3176 { "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 }, 3177 { "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 }, 3178 3179 { "vpsadbw", _XMM3, _XMM, _XMM }, 3180 { "vpsadbw", _ZMM, _ZMM, _MEM }, 3181 3182 { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 }, 3183 { "vpmuldq", ZMM_KZ, _ZMM, M_1to8 }, 3184 3185 { "vpmulhrsw", _XMM3, _XMM, _XMM }, 3186 { "vpmulhrsw", ZMM_KZ, _ZMM, _MEM }, 3187 3188 { "vpmulhuw", _XMM3, _XMM, _XMM }, 3189 { "vpmulhuw", ZMM_KZ, _ZMM, _MEM }, 3190 3191 { "vpmulhw", _XMM3, _XMM, _XMM }, 3192 { "vpmulhw", ZMM_KZ, _ZMM, _MEM }, 3193 3194 { "vpmullw", _XMM3, _XMM, _XMM }, 3195 { "vpmullw", ZMM_KZ, _ZMM, _MEM }, 3196 3197 { "vpmulld", _XMM3, _XMM, M_1to4 }, 3198 { "vpmulld", ZMM_KZ, _ZMM, M_1to16 }, 3199 3200 { "vpmullq", _XMM3, _XMM, M_1to2 }, 3201 { "vpmullq", ZMM_KZ, _ZMM, M_1to8 }, 3202 3203 { "vpmuludq", _XMM3, _XMM, M_1to2 }, 3204 { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 }, 3205 3206 { "vpunpckhbw", _XMM3, _XMM, _XMM }, 3207 { "vpunpckhbw", _ZMM, _ZMM, _MEM }, 3208 3209 { "vpunpckhwd", _XMM3, _XMM, _XMM }, 3210 { "vpunpckhwd", _ZMM, _ZMM, _MEM }, 3211 3212 { "vpunpckhdq", _XMM3, _XMM, M_1to4 }, 3213 { "vpunpckhdq", _ZMM, _ZMM, M_1to16 }, 3214 3215 { "vpunpckhqdq", _XMM3, _XMM, M_1to2 }, 3216 { "vpunpckhqdq", _ZMM, _ZMM, M_1to8 }, 3217 3218 { "vpunpcklbw", _XMM3, _XMM, _XMM }, 3219 { "vpunpcklbw", _ZMM, _ZMM, _MEM }, 3220 3221 { "vpunpcklwd", _XMM3, _XMM, _XMM }, 3222 { "vpunpcklwd", _ZMM, _ZMM, _MEM }, 3223 3224 { "vpunpckldq", _XMM3, _XMM, M_1to4 }, 3225 { "vpunpckldq", _ZMM, _ZMM, M_1to16 }, 3226 3227 { "vpunpcklqdq", _XMM3, _XMM, M_1to2 }, 3228 { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 }, 3229 3230 { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, 3231 { "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, 3232 { "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, 3233 { "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, 3234 3235 { "vextracti32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, 3236 { "vextracti64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, 3237 { "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, 3238 { "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, 3239 3240 { "vextractps", REG32 | _MEM, _XMM3, IMM8 }, 3241 3242 { "vpermb", XMM_KZ, _XMM, _XMM }, 3243 { "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3244 3245 { "vpermw", XMM_KZ, _XMM, _XMM }, 3246 { "vpermw", ZMM_KZ, _ZMM, _ZMM | _MEM }, 3247 3248 { "vpermd", YMM_KZ, _YMM, _YMM | M_1to8 }, 3249 { "vpermd", ZMM_KZ, _ZMM, _ZMM | M_1to16 }, 3250 3251 { "vpermilpd", XMM_KZ, _XMM, _XMM | M_1to2 }, 3252 { "vpermilpd", ZMM_KZ, _ZMM, M_1to8 }, 3253 { "vpermilpd", XMM_KZ, M_1to2, IMM8 }, 3254 { "vpermilpd", ZMM_KZ, M_1to8, IMM8 }, 3255 3256 { "vpermilps", XMM_KZ, _XMM, _XMM | M_1to4 }, 3257 { "vpermilps", ZMM_KZ, _ZMM, M_1to16 }, 3258 { "vpermilps", XMM_KZ, M_1to4, IMM8 }, 3259 { "vpermilps", ZMM_KZ, M_1to16, IMM8 }, 3260 3261 { "vpermpd", YMM_KZ, _YMM | M_1to4, IMM8 }, 3262 { "vpermpd", ZMM_KZ, _ZMM | M_1to8, IMM8 }, 3263 { "vpermpd", YMM_KZ, _YMM, M_1to4 }, 3264 { "vpermpd", ZMM_KZ, _ZMM, M_1to8 }, 3265 3266 { "vpermps", YMM_KZ, _YMM, M_1to8 }, 3267 { "vpermps", ZMM_KZ, _ZMM, M_1to16 }, 3268 3269 { "vpermq", YMM_KZ, _YMM | M_1to4, IMM8 }, 3270 { "vpermq", ZMM_KZ, _ZMM | M_1to8, IMM8 }, 3271 { "vpermq", YMM_KZ, _YMM, M_1to4 }, 3272 { "vpermq", ZMM_KZ, _ZMM, M_1to8 }, 3273 }; 3274 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 3275 const Tbl& p = tbl[i]; 3276 put(p.name, p.x1, p.x2, p.xm); 3277 } 3278 #endif 3279 } 3280 void put512_X3_I() 3281 { 3282 const struct Tbl { 3283 const char *name; 3284 uint64_t x1; 3285 uint64_t x2; 3286 uint64_t xm; 3287 } tbl[] = { 3288 #ifdef XBYAK64 3289 { "vinsertps", _XMM, _XMM, _XMM3 }, 3290 3291 { "vshufpd", XMM_KZ, _XMM, M_1to2 }, 3292 { "vshufpd", ZMM_KZ, _ZMM, M_1to8 }, 3293 3294 { "vshufps", XMM_KZ, _XMM, M_1to4 }, 3295 { "vshufps", ZMM_KZ, _ZMM, M_1to16 }, 3296 3297 { "vinsertf32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM }, 3298 { "vinsertf32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM }, 3299 3300 { "vinsertf64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM }, 3301 { "vinsertf64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM }, 3302 3303 { "vinsertf32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM }, 3304 { "vinsertf64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM }, 3305 3306 { "vinserti32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM }, 3307 { "vinserti32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM }, 3308 3309 { "vinserti64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM }, 3310 { "vinserti64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM }, 3311 3312 { "vinserti32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM }, 3313 { "vinserti64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM }, 3314 #endif 3315 { "vpalignr", ZMM_KZ, _ZMM, _ZMM }, 3316 }; 3317 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 3318 const Tbl& p = tbl[i]; 3319 put(p.name, p.x1, p.x2, p.xm, IMM8); 3320 } 3321 #ifdef XBYAK64 3322 put("vpextrb", _REG64, _XMM3, IMM8); 3323 put("vpextrw", _REG64|MEM, _XMM3, IMM8); 3324 put("vpextrd", _REG32, _XMM3, IMM8); 3325 put("vpextrq", _REG64, _XMM3, IMM8); 3326 put("vpinsrb", _XMM3, _XMM3, _REG32, IMM8); 3327 put("vpinsrw", _XMM3, _XMM3, _REG32, IMM8); 3328 put("vpinsrd", _XMM3, _XMM3, _REG32, IMM8); 3329 put("vpinsrq", _XMM3, _XMM3, _REG64, IMM8); 3330 #endif 3331 } 3332 void put512_FMA() 3333 { 3334 const struct Tbl { 3335 const char *name; 3336 bool supportYMM; 3337 } tbl[] = { 3338 { "vfmadd", true }, 3339 { "vfmadd", false }, 3340 { "vfmaddsub", true }, 3341 { "vfmsubadd", true }, 3342 { "vfmsub", true }, 3343 { "vfmsub", false }, 3344 { "vfnmadd", true }, 3345 { "vfnmadd", false }, 3346 { "vfnmsub", true }, 3347 { "vfnmsub", false }, 3348 }; 3349 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 3350 const Tbl& p = tbl[i]; 3351 const struct Ord { 3352 const char *name; 3353 } ord[] = { 3354 { "132" }, 3355 { "213" }, 3356 { "231" }, 3357 }; 3358 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) { 3359 const char sufTbl[][2][8] = { 3360 { "pd", "ps" }, 3361 { "sd", "ss" }, 3362 }; 3363 for (size_t k = 0; k < 2; k++) { 3364 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k]; 3365 uint64_t mem = 0; 3366 if (suf == "pd") { 3367 mem = M_1to2; 3368 } else if (suf == "ps") { 3369 mem = M_1to4; 3370 } else { 3371 mem = XMM_ER; 3372 } 3373 std::string name = std::string(p.name) + ord[j].name + suf; 3374 const char *q = name.c_str(); 3375 put(q, XMM_KZ, _XMM, mem); 3376 if (!p.supportYMM) continue; 3377 if (suf == "pd") { 3378 mem = M_1to8; 3379 } else if (suf == "ps") { 3380 mem = M_1to16; 3381 } else { 3382 mem = XMM_ER; 3383 } 3384 put(q, _ZMM, _ZMM, mem); 3385 } 3386 } 3387 } 3388 } 3389 void put512_Y_XM() 3390 { 3391 const char *tbl[] = { 3392 "vpmovsxbw", 3393 "vpmovsxbd", 3394 "vpmovsxbq", 3395 "vpmovsxwd", 3396 "vpmovsxwq", 3397 "vpmovsxdq", 3398 "vpmovzxbw", 3399 "vpmovzxbd", 3400 "vpmovzxbq", 3401 "vpmovzxwd", 3402 "vpmovzxwq", 3403 "vpmovzxdq", 3404 }; 3405 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 3406 const char *name = tbl[i]; 3407 put(name, XMM_KZ, _XMM); 3408 put(name, _ZMM, _MEM); 3409 } 3410 } 3411 void put512_AVX1() 3412 { 3413 #ifdef XBYAK64 3414 const struct Tbl { 3415 std::string name; 3416 bool only_pd_ps; 3417 } tbl[] = { 3418 { "vadd", false }, 3419 { "vsub", false }, 3420 { "vmul", false }, 3421 { "vdiv", false }, 3422 { "vmax", false }, 3423 { "vmin", false }, 3424 { "vand", true }, 3425 { "vandn", true }, 3426 { "vor", true }, 3427 { "vxor", true }, 3428 }; 3429 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 3430 const struct Suf { 3431 const char *suf; 3432 bool supportYMM; 3433 } sufTbl[] = { 3434 { "pd", true }, 3435 { "ps", true }, 3436 { "sd", false }, 3437 { "ss", false }, 3438 }; 3439 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { 3440 if (tbl[i].only_pd_ps && j == 2) break; 3441 std::string suf = sufTbl[j].suf; 3442 std::string name = tbl[i].name + suf; 3443 const char *p = name.c_str(); 3444 uint64_t mem = 0; 3445 if (suf == "pd") { 3446 mem = M_1to2; 3447 } else if (suf == "ps") { 3448 mem = M_1to4; 3449 } 3450 put(p, _XMM3 | XMM_KZ, _XMM, mem); 3451 if (!sufTbl[j].supportYMM) continue; 3452 mem = 0; 3453 if (suf == "pd") { 3454 mem = M_1to8; 3455 } else if (suf == "ps") { 3456 mem = M_1to16; 3457 } 3458 put(p, _ZMM, _ZMM, mem); 3459 } 3460 } 3461 #endif 3462 } 3463 void put512_cvt() 3464 { 3465 #ifdef XBYAK64 3466 put("vcvtdq2pd", XMM_KZ, _XMM | M_1to2); 3467 put("vcvtdq2pd", YMM_KZ, _XMM | M_1to4); 3468 put("vcvtdq2pd", ZMM_KZ, _YMM | M_1to8); 3469 3470 put("vcvtdq2ps", XMM_KZ, _XMM | M_1to4); 3471 put("vcvtdq2ps", YMM_KZ, _YMM | M_1to8); 3472 put("vcvtdq2ps", ZMM_KZ, _ZMM | M_1to16); 3473 3474 put("vcvtpd2dq", XMM_KZ, _XMM | _YMM | M_1to2); 3475 put("vcvtpd2dq", YMM_KZ, _ZMM | ZMM_ER | M_1to8); 3476 #endif 3477 } 3478 void putMin() 3479 { 3480 #ifdef XBYAK64 3481 put("vcvtpd2dq", _XMM | _XMM3, _XMM | M_xword | M_1to2); 3482 put("vcvtpd2dq", _XMM | _XMM3, _YMM | M_yword | MY_1to4); 3483 #endif 3484 } 3485 void putAVX512() 3486 { 3487 #ifdef MIN_TEST 3488 putMin(); 3489 #else 3490 putOpmask(); 3491 separateFunc(); 3492 putCombi(); 3493 separateFunc(); 3494 putCmpK(); 3495 separateFunc(); 3496 putBroadcast(); 3497 separateFunc(); 3498 putAVX512_M_X(); 3499 separateFunc(); 3500 put_vmov(); 3501 separateFunc(); 3502 put512_X_XM(); 3503 separateFunc(); 3504 put512_X_X_XM(); 3505 separateFunc(); 3506 put512_X3(); 3507 separateFunc(); 3508 put512_X3_I(); 3509 separateFunc(); 3510 put512_FMA(); 3511 separateFunc(); 3512 put512_Y_XM(); 3513 separateFunc(); 3514 put512_AVX1(); 3515 separateFunc(); 3516 put512_cvt(); 3517 #endif 3518 } 3519 #endif 3520 }; 3521 3522 int main(int argc, char *[]) 3523 { 3524 Test test(argc > 1); 3525 test.put(); 3526 }