/ externals / xbyak / test / make_nm.cpp
make_nm.cpp
   1  #include <stdio.h>
   2  #include "xbyak/xbyak.h"
   3  #include "xbyak/xbyak_bin2hex.h"
   4  #include <stdlib.h>
   5  #include <string.h>
   6  #include "cybozu/inttype.hpp"
   7  #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
   8  
   9  using namespace Xbyak;
  10  
  11  const int bitEnd = 64;
  12  
  13  const uint64_t MMX = 1ULL << 0;
  14  const uint64_t _XMM = 1ULL << 1;
  15  const uint64_t _MEM = 1ULL << 2;
  16  const uint64_t _REG32 = 1ULL << 3;
  17  const uint64_t EAX = 1ULL << 4;
  18  const uint64_t IMM32 = 1ULL << 5;
  19  const uint64_t IMM8 = 1ULL << 6;
  20  const uint64_t _REG8 = 1ULL << 7;
  21  const uint64_t _REG16 = 1ULL << 8;
  22  const uint64_t NEG8 = 1ULL << 9;
  23  const uint64_t IMM16 = 1ULL << 10;
  24  const uint64_t NEG16 = 1ULL << 11;
  25  const uint64_t AX = 1ULL << 12;
  26  const uint64_t AL = 1ULL << 13;
  27  const uint64_t IMM_1 = 1ULL << 14;
  28  const uint64_t MEM8 = 1ULL << 15;
  29  const uint64_t MEM16 = 1ULL << 16;
  30  const uint64_t MEM32 = 1ULL << 17;
  31  const uint64_t ONE = 1ULL << 19;
  32  const uint64_t CL = 1ULL << 20;
  33  const uint64_t MEM_ONLY_DISP = 1ULL << 21;
  34  const uint64_t NEG32 = 1ULL << 23;
  35  const uint64_t _YMM = 1ULL << 24;
  36  const uint64_t VM32X_32 = 1ULL << 39;
  37  const uint64_t VM32X_64 = 1ULL << 40;
  38  const uint64_t VM32Y_32 = 1ULL << 41;
  39  const uint64_t VM32Y_64 = 1ULL << 42;
  40  #ifdef XBYAK64
  41  const uint64_t _MEMe = 1ULL << 25;
  42  const uint64_t REG32_2 = 1ULL << 26; // r8d, ...
  43  const uint64_t REG16_2 = 1ULL << 27; // r8w, ...
  44  const uint64_t REG8_2 = 1ULL << 28; // r8b, ...
  45  const uint64_t REG8_3 = 1ULL << 29; // spl, ...
  46  const uint64_t _REG64 = 1ULL << 30; // rax, ...
  47  const uint64_t _REG64_2 = 1ULL << 31; // r8, ...
  48  const uint64_t RAX = 1ULL << 32;
  49  const uint64_t _XMM2 = 1ULL << 33;
  50  const uint64_t _YMM2 = 1ULL << 34;
  51  const uint64_t VM32X = VM32X_32 | VM32X_64;
  52  const uint64_t VM32Y = VM32Y_32 | VM32Y_64;
  53  #else
  54  const uint64_t _MEMe = 0;
  55  const uint64_t REG32_2 = 0;
  56  const uint64_t REG16_2 = 0;
  57  const uint64_t REG8_2 = 0;
  58  const uint64_t REG8_3 = 0;
  59  const uint64_t _REG64 = 0;
  60  const uint64_t _REG64_2 = 0;
  61  const uint64_t RAX = 0;
  62  const uint64_t _XMM2 = 0;
  63  const uint64_t _YMM2 = 0;
  64  const uint64_t VM32X = VM32X_32;
  65  const uint64_t VM32Y = VM32Y_32;
  66  #endif
  67  const uint64_t REG64 = _REG64 | _REG64_2 | RAX;
  68  const uint64_t REG32 = _REG32 | REG32_2 | EAX;
  69  const uint64_t REG16 = _REG16 | REG16_2 | AX;
  70  const uint64_t REG32e = REG32 | REG64;
  71  const uint64_t REG8 = _REG8 | REG8_2|AL;
  72  const uint64_t MEM = _MEM | _MEMe;
  73  const uint64_t MEM64 = 1ULL << 35;
  74  const uint64_t ST0 = 1ULL << 36;
  75  const uint64_t STi = 1ULL << 37;
  76  const uint64_t IMM_2 = 1ULL << 38;
  77  const uint64_t IMM = IMM_1 | IMM_2;
  78  const uint64_t XMM = _XMM | _XMM2;
  79  const uint64_t YMM = _YMM | _YMM2;
  80  const uint64_t K = 1ULL << 43;
  81  const uint64_t _ZMM = 1ULL << 44;
  82  const uint64_t _ZMM2 = 1ULL << 45;
  83  #ifdef XBYAK64
  84  const uint64_t ZMM = _ZMM | _ZMM2;
  85  const uint64_t _YMM3 = 1ULL << 46;
  86  #else
  87  const uint64_t ZMM = _ZMM;
  88  const uint64_t _YMM3 = 0;
  89  #endif
  90  const uint64_t K2 = 1ULL << 47;
  91  const uint64_t ZMM_SAE = 1ULL << 48;
  92  const uint64_t ZMM_ER = 1ULL << 49;
  93  #ifdef XBYAK64
  94  const uint64_t _XMM3 = 1ULL << 50;
  95  #endif
  96  const uint64_t XMM_SAE = 1ULL << 51;
  97  #ifdef XBYAK64
  98  const uint64_t XMM_KZ = 1ULL << 52;
  99  const uint64_t YMM_KZ = 1ULL << 53;
 100  const uint64_t ZMM_KZ = 1ULL << 54;
 101  #else
 102  const uint64_t XMM_KZ = 0;
 103  const uint64_t YMM_KZ = 0;
 104  const uint64_t ZMM_KZ = 0;
 105  #endif
 106  const uint64_t MEM_K = 1ULL << 55;
 107  const uint64_t M_1to2 = 1ULL << 56;
 108  const uint64_t M_1to4 = 1ULL << 57;
 109  const uint64_t M_1to8 = 1ULL << 58;
 110  const uint64_t M_1to16 = 1ULL << 59;
 111  const uint64_t XMM_ER = 1ULL << 60;
 112  const uint64_t M_xword = 1ULL << 61;
 113  const uint64_t M_yword = 1ULL << 62;
 114  const uint64_t MY_1to4 = 1ULL << 18;
 115  const uint64_t BNDREG = 1ULL << 22;
 116  
 117  const uint64_t NOPARA = 1ULL << (bitEnd - 1);
 118  
 119  class Test {
 120  	Test(const Test&);
 121  	void operator=(const Test&);
 122  	const bool isXbyak_;
 123  	int funcNum_;
 124  	/*
 125  		and_, or_, xor_, not_ => and, or, xor, not
 126  	*/
 127  	std::string removeUnderScore(std::string s) const
 128  	{
 129  		if (!isXbyak_ && s[s.size() - 1] == '_') s.resize(s.size() - 1);
 130  		return s;
 131  	}
 132  
 133  	// check all op1, op2, op3
 134  	void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const
 135  	{
 136  		for (int i = 0; i < bitEnd; i++) {
 137  			if ((op1 & (1ULL << i)) == 0) continue;
 138  			for (int j = 0; j < bitEnd; j++) {
 139  				if ((op2 & (1ULL << j)) == 0) continue;
 140  				for (int k = 0; k < bitEnd; k++) {
 141  					if ((op3 & (1ULL << k)) == 0) continue;
 142  					for (int s = 0; s < bitEnd; s++) {
 143  						if ((op4 & (1ULL << s)) == 0) continue;
 144  						printf("%s ", nm.c_str());
 145  						if (isXbyak_) printf("(");
 146  						if (!(op1 & NOPARA)) printf("%s", get(1ULL << i));
 147  						if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j));
 148  						if (!(op3 & NOPARA)) printf(", %s", get(1ULL << k));
 149  						if (!(op4 & NOPARA)) printf(", %s", get(1ULL << s));
 150  						if (isXbyak_) printf("); dump();");
 151  						printf("\n");
 152  					}
 153  				}
 154  			}
 155  		}
 156  	}
 157  	void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const
 158  	{
 159  		for (int i = 0; i < bitEnd; i++) {
 160  			if ((op & (1ULL << i)) == 0) continue;
 161  			printf("%s ", nm);
 162  			if (isXbyak_) printf("(");
 163  			if (!(op & NOPARA)) printf("%s", get(1ULL << i));
 164  			printf(", %s", isXbyak_ ? xbyak : nasm);
 165  			if (isXbyak_) printf("); dump();");
 166  			printf("\n");
 167  		}
 168  	}
 169  	void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const
 170  	{
 171  		if (nasm == 0) nasm = xbyak;
 172  		for (int i = 0; i < bitEnd; i++) {
 173  			if ((op & (1ULL << i)) == 0) continue;
 174  			printf("%s ", nm);
 175  			if (isXbyak_) printf("(");
 176  			printf("%s ", isXbyak_ ? xbyak : nasm);
 177  			if (!(op & NOPARA)) printf(", %s", get(1ULL << i));
 178  			if (isXbyak_) printf("); dump();");
 179  			printf("\n");
 180  		}
 181  	}
 182  	void put(const char *nm, const char *para1, uint64_t op2, const char *para3) const
 183  	{
 184  		for (int j = 0; j < bitEnd; j++) {
 185  			if ((op2 & (1ULL << j)) == 0) continue;
 186  			printf("%s ", nm);
 187  			if (isXbyak_) printf("(");
 188  			printf("%s", para1);
 189  			if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j));
 190  			printf(", %s", para3);
 191  			if (isXbyak_) printf("); dump();");
 192  			printf("\n");
 193  		}
 194  	}
 195  	const char *get(uint64_t type) const
 196  	{
 197  		int idx = (rand() / 31) & 7;
 198  		if (type == ST0) {
 199  			return "st0";
 200  		}
 201  		if (type == STi) {
 202  			return "st2";
 203  		}
 204  		switch (type) {
 205  		case MMX:
 206  			{
 207  				static const char MmxTbl[][4] = {
 208  					"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
 209  				};
 210  				return MmxTbl[idx];
 211  			}
 212  		case _XMM:
 213  			{
 214  				static const char tbl[][6] = {
 215  					"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
 216  				};
 217  				return tbl[idx];
 218  			}
 219  		case _YMM:
 220  			{
 221  				static const char tbl[][6] = {
 222  					"ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7"
 223  				};
 224  				return tbl[idx];
 225  			}
 226  		case _ZMM:
 227  			{
 228  				static const char tbl[][6] = {
 229  					"zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7"
 230  				};
 231  				return tbl[idx];
 232  			}
 233  #ifdef XBYAK64
 234  		case _XMM2:
 235  			{
 236  				static const char tbl[][6] = {
 237  					"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
 238  				};
 239  				return tbl[idx];
 240  			}
 241  		case _XMM3:
 242  			{
 243  				static const char tbl[][6] = {
 244  					"xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23"
 245  				};
 246  				return tbl[idx];
 247  			}
 248  		case _YMM2:
 249  			{
 250  				static const char tbl[][6] = {
 251  					"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
 252  				};
 253  				return tbl[idx];
 254  			}
 255  		case _YMM3:
 256  			{
 257  				static const char tbl[][6] = {
 258  					"ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23",
 259  				};
 260  				return tbl[idx];
 261  			}
 262  		case _ZMM2:
 263  			{
 264  				static const char tbl[][6] = {
 265  					"zmm8", "zmm9", "zmm10", "zmm11", "zmm28", "zmm29", "zmm30", "zmm31",
 266  				};
 267  				return tbl[idx];
 268  			}
 269  #endif
 270  		case _MEM:
 271  			{
 272  				return isXbyak_ ? "ptr[eax+ecx+3]" : "[eax+ecx+3]"; // QQQ : disp8N
 273  /*
 274  				idx %= 5;
 275  				switch (idx) {
 276  				case 0: return isXbyak_ ? "ptr[eax+ecx]" : "[eax+ecx]";
 277  				case 1:	return isXbyak_ ? "ptr[eax+ecx+1]" : "[eax+ecx+1]";
 278  				case 2:	return isXbyak_ ? "ptr[eax+ecx+16]" : "[eax+ecx+16]";
 279  				case 3:	return isXbyak_ ? "ptr[eax+ecx+32]" : "[eax+ecx+32]";
 280  				case 4:	return isXbyak_ ? "ptr[eax+ecx+48]" : "[eax+ecx+48]";
 281  				}
 282  */
 283  			}
 284  		case _MEMe:
 285  			{
 286  				static int ccc = 1;
 287  #ifdef USE_YASM
 288  				ccc++;
 289  #endif
 290  				if (ccc & 1) {
 291  					return isXbyak_ ? "ptr[rdx+r15+0x12]" : "[rdx+r15+0x12]";
 292  				} else {
 293  					return isXbyak_ ? "ptr[rip - 0x13456+1-3]" : "[rip - 0x13456+1-3]";
 294  				}
 295  			}
 296  		case MEM8:
 297  			return "byte [eax+edx]";
 298  		case MEM16:
 299  			return "word [esi]";
 300  		case MEM32:
 301  			return "dword [ebp*2]";
 302  		case MEM64:
 303  			return "qword [eax+ecx*8]";
 304  		case MEM_ONLY_DISP:
 305  			return isXbyak_ ? "ptr[(void*)0x123]" : "[0x123]";
 306  		case _REG16: // not ax
 307  			{
 308  				static const char Reg16Tbl[][4] = {
 309  					"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
 310  				};
 311  				return Reg16Tbl[(idx % 7) + 1];
 312  			}
 313  		case _REG8: // not al
 314  			{
 315  				static const char Reg8Tbl[][4] = {
 316  #ifdef XBYAK64 // QQQ
 317  					"al", "cl", "dl", "bl", "al", "cl", "dl", "bl"
 318  #else
 319  					"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"
 320  #endif
 321  				};
 322  				return Reg8Tbl[(idx % 7) + 1];
 323  			}
 324  		case _REG32: // not eax
 325  			{
 326  				static const char Reg32Tbl[][4] = {
 327  					"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
 328  				};
 329  				return Reg32Tbl[(idx % 7) + 1];
 330  			}
 331  #ifdef XBYAK64
 332  		case _REG64: // not rax
 333  			{
 334  				static const char Reg64Tbl[][4] = {
 335  					"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"
 336  				};
 337  				return Reg64Tbl[(idx % 7) + 1];
 338  			}
 339  		case _REG64_2:
 340  			{
 341  				static const char Reg64_2Tbl[][4] = {
 342  					"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 343  				};
 344  				return Reg64_2Tbl[idx];
 345  			}
 346  		case REG32_2:
 347  			{
 348  				static const char Reg32eTbl[][5] = {
 349  					"r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
 350  				};
 351  				return Reg32eTbl[idx];
 352  			}
 353  		case REG16_2:
 354  			{
 355  				static const char Reg16eTbl[][5] = {
 356  					"r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
 357  				};
 358  				return Reg16eTbl[idx];
 359  			}
 360  		case REG8_2:
 361  			{
 362  				static const char Reg8_2Tbl[][5] = {
 363  					"r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
 364  				};
 365  				return Reg8_2Tbl[idx];
 366  			}
 367  		case REG8_3:
 368  			{
 369  				static const char Reg8_3Tbl[][5] = {
 370  					"spl", "bpl", "sil", "dil", "spl", "bpl", "sil", "dil"
 371  				};
 372  				return Reg8_3Tbl[idx];
 373  			}
 374  		case RAX:
 375  			return "rax";
 376  #endif
 377  		case EAX:
 378  			return "eax";
 379  		case AX:
 380  			return "ax";
 381  		case AL:
 382  			return "al";
 383  		case CL:
 384  			return "cl";
 385  		case ONE:
 386  			return "1";
 387  		case IMM32:
 388  			return isXbyak_ ? "12345678" : "dword 12345678";
 389  		case IMM16:
 390  			return isXbyak_ ? "1000" : "word 1000";
 391  		case IMM8:
 392  			return isXbyak_ ? "4" : "byte 4";
 393  		case NEG8:
 394  			return isXbyak_ ? "-30" : "byte -30";
 395  		case NEG16:
 396  			return isXbyak_ ? "-1000" : "word -1000";
 397  		case NEG32:
 398  			return isXbyak_ ? "-100000" : "dword -100000";
 399  		case IMM_1:
 400  			return "4";
 401  		case IMM_2:
 402  			return isXbyak_ ? "0xda" : "0xda";
 403  		case VM32X_32:
 404  			return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
 405  		case VM32X_64:
 406  			return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
 407  		case VM32Y_32:
 408  			return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
 409  		case VM32Y_64:
 410  			return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
 411  		case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
 412  		case M_1to4: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to4}";
 413  		case M_1to8: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to8}";
 414  		case M_1to16: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to16}";
 415  
 416  		case M_xword: return isXbyak_ ? "ptr [eax+33]" : "oword [eax+33]";
 417  		case M_yword: return isXbyak_ ? "yword [eax+33]" : "yword [eax+33]";
 418  		case MY_1to4: return isXbyak_ ? "yword_b [eax+32]" : "[eax+32]{1to4}";
 419  		case K:
 420  			{
 421  				static const char kTbl[][5] = {
 422  					"k1", "k2", "k3", "k4", "k5", "k6", "k7",
 423  				};
 424  				return kTbl[idx % 7];
 425  			}
 426  		case K2:
 427  			return isXbyak_ ? "k3 | k5" : "k3{k5}";
 428  		case BNDREG:
 429  			{
 430  				static const char tbl[][5] = {
 431  					"bnd0", "bnd1", "bnd2", "bnd3",
 432  				};
 433  				return tbl[idx % 4];
 434  			}
 435  #ifdef XBYAK64
 436  		case XMM_SAE:
 437  			return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}";
 438  		case ZMM_SAE:
 439  			return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
 440  		case XMM_ER:
 441  			return isXbyak_ ? "xmm4 | T_rd_sae" : "xmm4, {rd-sae}";
 442  		case ZMM_ER:
 443  			return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}";
 444  		case XMM_KZ:
 445  			return isXbyak_ ? "xmm5 | k5" : "xmm5{k5}";
 446  		case YMM_KZ:
 447  			return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}";
 448  		case ZMM_KZ:
 449  			return isXbyak_ ? "zmm7|k1" : "zmm7{k1}";
 450  		case MEM_K:
 451  			return isXbyak_ ? "ptr [rax] | k1" : "[rax]{k1}";
 452  #else
 453  		case XMM_SAE:
 454  			return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
 455  		case ZMM_SAE:
 456  			return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
 457  		case XMM_ER:
 458  			return isXbyak_ ? "xmm30 | T_rd_sae" : "xmm30, {rd-sae}";
 459  		case ZMM_ER:
 460  			return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}";
 461  		case MEM_K:
 462  			return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}";
 463  #endif
 464  		}
 465  		return 0;
 466  	}
 467  	void putSIMPLE() const
 468  	{
 469  		const char tbl[][20] = {
 470  #ifdef XBYAK64
 471  			"cdqe",
 472  			"cqo",
 473  			"cmpsq",
 474  			"movsq",
 475  			"popfq",
 476  			"pushfq",
 477  			"lodsq",
 478  			"movsq",
 479  			"scasq",
 480  			"stosq",
 481  			"syscall",
 482  			"sysret",
 483  #else
 484  			"aaa",
 485  			"aad",
 486  			"aam",
 487  			"aas",
 488  			"daa",
 489  			"das",
 490  			"into",
 491  			"popad",
 492  			"popfd",
 493  			"pusha",
 494  			"pushad",
 495  			"pushfd",
 496  			"popa",
 497  #endif
 498  
 499  			"cbw",
 500  			"cdq",
 501  			"clc",
 502  			"cld",
 503  			"cli",
 504  			"cmc",
 505  
 506  			"cpuid",
 507  			"cwd",
 508  			"cwde",
 509  
 510  			"lahf",
 511  //			"lock",
 512  			"cmpsb",
 513  			"cmpsw",
 514  			"cmpsd",
 515  			"hlt",
 516  			"int3",
 517  			"leave",
 518  			"lodsb",
 519  			"lodsw",
 520  			"lodsd",
 521  			"movsb",
 522  			"movsw",
 523  			"movsd",
 524  			"outsb",
 525  			"outsw",
 526  			"outsd",
 527  			"scasb",
 528  			"scasw",
 529  			"scasd",
 530  			"stosb",
 531  			"stosw",
 532  			"stosd",
 533  			"nop",
 534  
 535  			"sahf",
 536  			"serialize",
 537  			"stc",
 538  			"std",
 539  			"sti",
 540  			"sysenter",
 541  			"sysexit",
 542  
 543  			"emms",
 544  			"pause",
 545  			"sfence",
 546  			"lfence",
 547  			"mfence",
 548  			"monitor",
 549  			"mwait",
 550  
 551  			"rdmsr",
 552  			"rdpmc",
 553  			"rdtsc",
 554  			"rdtscp",
 555  			"ud2",
 556  			"wait",
 557  			"fwait",
 558  			"wbinvd",
 559  			"wrmsr",
 560  			"xlatb",
 561  			"xend",
 562  
 563  			"popf",
 564  			"pushf",
 565  			"stac",
 566  
 567  			"xgetbv",
 568  			"vzeroall",
 569  			"vzeroupper",
 570  
 571  			"f2xm1",
 572  			"fabs",
 573  			"faddp",
 574  			"fchs",
 575  			"fclex",
 576  			"fnclex",
 577  			"fcom",
 578  			"fcomp",
 579  			"fcompp",
 580  			"fcos",
 581  			"fdecstp",
 582  			"fdivp",
 583  			"fdivrp",
 584  			"fincstp",
 585  			"finit",
 586  			"fninit",
 587  			"fld1",
 588  			"fldl2t",
 589  			"fldl2e",
 590  			"fldpi",
 591  			"fldlg2",
 592  			"fldln2",
 593  			"fldz",
 594  			"fmulp",
 595  			"fnop",
 596  			"fpatan",
 597  			"fprem",
 598  			"fprem1",
 599  			"fptan",
 600  			"frndint",
 601  			"fscale",
 602  			"fsin",
 603  			"fsincos",
 604  			"fsqrt",
 605  			"fsubp",
 606  			"fsubrp",
 607  			"ftst",
 608  			"fucom",
 609  			"fucomp",
 610  			"fucompp",
 611  			"fxam",
 612  			"fxch",
 613  			"fxtract",
 614  			"fyl2x",
 615  			"fyl2xp1",
 616  
 617  			"monitorx",
 618  			"mwaitx",
 619  			"clzero",
 620  		};
 621  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 622  			put(tbl[i]);
 623  		}
 624  		{
 625  			const char memTbl[][16] = {
 626  				"clflush",
 627  				"clflushopt",
 628  				"fbld",
 629  				"fbstp",
 630  				"fldcw",
 631  				"fldenv",
 632  				"frstor",
 633  				"fsave",
 634  				"fnsave",
 635  				"fstcw",
 636  				"fnstcw",
 637  				"fstenv",
 638  				"fnstenv",
 639  				"fstsw",
 640  				"fnstsw",
 641  				"fxrstor",
 642  				"clwb",
 643  			};
 644  			for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
 645  				put(memTbl[i], MEM);
 646  			}
 647  			put("fstsw", AX);
 648  			put("fnstsw", AX);
 649  		}
 650  
 651  		put("bswap", REG32e);
 652  		put("lea", REG32e|REG16, MEM);
 653  		put("enter", IMM, IMM);
 654  		put(isXbyak_ ? "int_" : "int", IMM8);
 655  		put(isXbyak_ ? "in_" : "in", AL|AX|EAX, IMM8);
 656  		puts(isXbyak_ ? "in_(al, dx); dump();" : "in al, dx");
 657  		puts(isXbyak_ ? "in_(ax, dx); dump();" : "in ax, dx");
 658  		puts(isXbyak_ ? "in_(eax, dx); dump();" : "in eax, dx");
 659  		put(isXbyak_ ? "out_" : "out", IMM8, AL|AX|EAX);
 660  		puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al");
 661  		puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax");
 662  		puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax");
 663  		puts(isXbyak_ ? "lea(eax, ptr[edi + 4 * eax]); dump();" : "lea eax, [edi + 4 * eax]");
 664  	}
 665  	void putJmp() const
 666  	{
 667  #ifdef XBYAK64
 668  		put("jmp", REG64);
 669  		put("call", REG64);
 670  #else
 671  		put("jmp", REG32);
 672  		put("call", REG16|REG32);
 673  #endif
 674  		put("jmp", MEM);
 675  		put("jmp", MEM);
 676  		put("jmp", MEM);
 677  		put("call", MEM|MEM_ONLY_DISP);
 678  #ifndef USE_YASM
 679  		// call(ptr [getCode() + 5]); means to construct the opecode of "call"
 680  		// after calling getCode().
 681  		// Its behavior is same as NASM(MASM). YASM makes different opecode.
 682  		put("call", "getCode() + 5", "$ + 5");
 683  #endif
 684  
 685  #ifdef XBYAK64
 686  		put("jmp", "ptr[(void*)0x12345678]", "[0x12345678]");
 687  		put("call", "ptr[(void*)0x12345678]", "[0x12345678]");
 688  #ifdef USE_YASM
 689  		put("jmp", "ptr[rip + 0x12345678]", "[rip+0x12345678]");
 690  		put("call", "ptr[rip + 0x12345678]", "[rip+0x12345678]");
 691  		put("call", "ptr[rip -23]", "[rip-23]");
 692  		put("call", "ptr[rip -23+56]", "[rip-23+56]");
 693  #else
 694  		// bug of yasm?
 695  		if (isXbyak_) {
 696  			puts("{ Label label0;");
 697  			puts("L(label0);");
 698  			puts("pshufb (xmm14, ptr [rip+label0]); dump();");
 699  			puts("}");
 700  		} else {
 701  			puts("label0:");
 702  			puts("pshufb xmm14, [rel label0]");
 703  		}
 704  #endif
 705  #endif
 706  	}
 707  	void putFarJmp() const
 708  	{
 709  #ifdef XBYAK64
 710  		put("jmp", "word[rax],T_FAR", "far word [rax]");
 711  		put("jmp", "dword[rax],T_FAR", "far dword [rax]");
 712  		put("jmp", "qword[rax],T_FAR", "far qword [rax]");
 713  
 714  		put("call", "word[rax],T_FAR", "far word [rax]");
 715  		put("call", "dword[rax],T_FAR", "far dword [rax]");
 716  		put("call", "qword[rax],T_FAR", "far qword [rax]");
 717  #else
 718  		put("jmp", "dword[eax],T_FAR", "far dword [eax]");
 719  		put("jmp", "word[eax],T_FAR", "far word [eax]");
 720  
 721  		put("call", "dword[eax],T_FAR", "far dword [eax]");
 722  		put("call", "word[eax],T_FAR", "far word [eax]");
 723  #endif
 724  	}
 725  	void putMMX1() const
 726  	{
 727  		// emms etc
 728  		put("ldmxcsr", MEM);
 729  		put("movmskps", REG32e, XMM);
 730  		put("movmskpd", REG32e, XMM);
 731  		put("stmxcsr", MEM);
 732  		put("maskmovq", MMX, MMX);
 733  		put("movntps", MEM, XMM);
 734  		put("movntq", MEM, MMX);
 735  		put("prefetcht0", MEM);
 736  		put("prefetcht1", MEM);
 737  		put("prefetcht2", MEM);
 738  		put("prefetchnta", MEM);
 739  		put("prefetchwt1", MEM);
 740  		put("prefetchw", MEM);
 741  
 742  		// SSE2 misc
 743  		put("maskmovdqu", XMM, XMM);
 744  		put("movntpd", MEM, XMM);
 745  		put("movntdq", MEM, XMM);
 746  		put("movnti", MEM, REG32); // QQQ:REG32e?
 747  
 748  		put("movhlps", XMM, XMM);
 749  		put("movlhps", XMM, XMM);
 750  
 751  		// movd for MMX, XMM
 752  		put("movd", MEM|MEM32|REG32, MMX|XMM);
 753  		put("movd", MMX|XMM, MEM|REG32|MEM32);
 754  
 755  		// movq for MMX
 756  		put("movq", MMX, MMX|MEM);
 757  		put("movq", MEM, MMX);
 758  		// movq for XMM
 759  		put("movq", XMM, XMM|MEM);
 760  		put("movq", MEM, XMM);
 761  		put("movq", XMM|MMX, "qword[eax]", "qword[eax]");
 762  		put("movq", XMM|MMX, "ptr[eax]", "qword[eax]");
 763  		put("movq", "qword[eax]", "qword[eax]", XMM|MMX);
 764  		put("movq", "ptr[eax]", "qword[eax]", XMM|MMX);
 765  #ifdef XBYAK64
 766  		put("movq", REG64, XMM|MMX);
 767  		put("movq", XMM|MMX, REG64);
 768  #endif
 769  
 770  		// SSE3 int
 771  		put("lddqu", XMM, MEM);
 772  	}
 773  	void putMMX2() const
 774  	{
 775  		static const char nmTbl[][16] = {
 776  			// MMX
 777  			"packssdw",
 778  			"packsswb",
 779  			"packuswb",
 780  			"pand",
 781  			"pandn",
 782  			"pmaddwd",
 783  			"pmulhuw",
 784  			"pmulhw",
 785  			"pmullw",
 786  			"por",
 787  			"punpckhbw",
 788  			"punpckhwd",
 789  			"punpckhdq",
 790  			"punpcklbw",
 791  			"punpcklwd",
 792  			"punpckldq",
 793  			"pxor",
 794  			"paddb",
 795  			"paddw",
 796  			"paddd",
 797  			"paddsb",
 798  			"paddsw",
 799  			"paddusb",
 800  			"paddusw",
 801  			"pcmpeqb",
 802  			"pcmpeqw",
 803  			"pcmpeqd",
 804  			"pcmpgtb",
 805  			"pcmpgtw",
 806  			"pcmpgtd",
 807  			"psllw",
 808  			"pslld",
 809  			"psllq",
 810  			"psraw",
 811  			"psrad",
 812  			"psrlw",
 813  			"psrld",
 814  			"psrlq",
 815  			"psubb",
 816  			"psubw",
 817  			"psubd",
 818  			"psubsb",
 819  			"psubsw",
 820  			"psubusb",
 821  			"psubusw",
 822  			// MMX2
 823  			"pavgb",
 824  			"pavgw",
 825  			"pmaxsw",
 826  			"pmaxub",
 827  			"pminsw",
 828  			"pminub",
 829  			"psadbw",
 830  			//
 831  			"paddq",
 832  			"pmuludq",
 833  			"psubq",
 834  		};
 835  		for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
 836  			put(nmTbl[i], MMX, MMX|MEM);
 837  			put(nmTbl[i], XMM, XMM|MEM);
 838  		}
 839  	}
 840  	void putMMX3() const
 841  	{
 842  		static const char nmTbl[][16] = {
 843  			"psllw",
 844  			"pslld",
 845  			"psllq",
 846  			"psraw",
 847  			"psrad",
 848  			"psrlw",
 849  			"psrld",
 850  			"psrlq",
 851  		};
 852  		for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
 853  			put(nmTbl[i], MMX|XMM, IMM);
 854  		}
 855  		put("pslldq", XMM, IMM);
 856  		put("psrldq", XMM, IMM);
 857  		put("pmovmskb", REG32, MMX|XMM); // QQQ
 858  		put("pextrw", REG32, MMX|XMM, IMM); // QQQ
 859  		put("pinsrw", MMX|XMM, REG32|MEM, IMM); // QQQ
 860  	}
 861  	void putMMX4() const
 862  	{
 863  		put("pshufw", MMX, MMX|MEM, IMM);
 864  		put("pshuflw", XMM, XMM|MEM, IMM);
 865  		put("pshufhw", XMM, XMM|MEM, IMM);
 866  		put("pshufd", XMM, XMM|MEM, IMM);
 867  	}
 868  	void putMMX5() const
 869  	{
 870  		static const char nmTbl[][16] = {
 871  			"movdqa",
 872  			"movdqu",
 873  			"movaps",
 874  			"movss",
 875  			"movups",
 876  			"movapd",
 877  			"movsd",
 878  			"movupd",
 879  		};
 880  		for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
 881  			put(nmTbl[i], XMM, XMM|MEM);
 882  			put(nmTbl[i], MEM, XMM);
 883  		}
 884  		put("movq2dq", XMM, MMX);
 885  		put("movdq2q", MMX, XMM);
 886  	}
 887  
 888  	void putXMM1() const
 889  	{
 890  		enum {
 891  			PS = 1 << 0,
 892  			SS = 1 << 1,
 893  			PD = 1 << 2,
 894  			SD = 1 << 3
 895  		};
 896  		const struct {
 897  			uint8_t code;
 898  			const char *name;
 899  		} sufTbl[] = {
 900  			{ 0, "ps" },
 901  			{ 0xF3, "ss" },
 902  			{ 0x66, "pd" },
 903  			{ 0xF2, "sd" },
 904  		};
 905  		static const struct XmmTbl1 {
 906  			uint8_t code;
 907  			int mode;
 908  			const char *name;
 909  			bool hasImm;
 910  		} xmmTbl1[] = {
 911  			{ B01011000, PS|SS|PD|SD, "add", false },
 912  			{ B01010101, PS|PD      , "andn", false },
 913  			{ B01010100, PS|PD      , "and", false },
 914  			{ B11000010, PS|SS|PD|SD, "cmp", true },
 915  			{ B01011110, PS|SS|PD|SD, "div", false },
 916  			{ B01011111, PS|SS|PD|SD, "max", false },
 917  			{ B01011101, PS|SS|PD|SD, "min", false },
 918  			{ B01011001, PS|SS|PD|SD, "mul", false },
 919  			{ B01010110, PS|PD      , "or", false },
 920  			{ B01010011, PS|SS      , "rcp", false },
 921  			{ B01010010, PS|SS      , "rsqrt", false },
 922  			{ B11000110, PS|PD      , "shuf", true },
 923  			{ B01010001, PS|SS|PD|SD, "sqrt", false },
 924  			{ B01011100, PS|SS|PD|SD, "sub", false },
 925  			{ B00010101, PS|PD      , "unpckh", false },
 926  			{ B00010100, PS|PD      , "unpckl", false },
 927  			{ B01010111, PS|PD      , "xor", false },
 928  			//
 929  		};
 930  		for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl1); i++) {
 931  			const XmmTbl1 *p = &xmmTbl1[i];
 932  			for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
 933  				if (!(p->mode & (1 << j))) continue;
 934  				char buf[16];
 935  				snprintf(buf, sizeof(buf), "%s%s", p->name, sufTbl[j].name);
 936  				if (p->hasImm) {
 937  					put(buf, XMM, XMM|MEM, IMM);
 938  				} else {
 939  					put(buf, XMM, XMM|MEM);
 940  				}
 941  			}
 942  		}
 943  	}
 944  	void putXMM2() const
 945  	{
 946  		// (XMM, XMM|MEM)
 947  		static const char tbl[][16] = {
 948  			"punpckhqdq",
 949  			"punpcklqdq",
 950  
 951  			"comiss",
 952  			"ucomiss",
 953  			"comisd",
 954  			"ucomisd",
 955  
 956  			"cvtpd2ps",
 957  			"cvtps2pd",
 958  			"cvtsd2ss",
 959  			"cvtss2sd",
 960  			"cvtpd2dq",
 961  			"cvttpd2dq",
 962  			"cvtdq2pd",
 963  			"cvtps2dq",
 964  			"cvttps2dq",
 965  			"cvtdq2ps",
 966  
 967  			"addsubpd",
 968  			"addsubps",
 969  			"haddpd",
 970  			"haddps",
 971  			"hsubpd",
 972  			"hsubps",
 973  			"movddup",
 974  			"movshdup",
 975  			"movsldup",
 976  		};
 977  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 978  			put(tbl[i], XMM, XMM|MEM);
 979  		}
 980  	}
 981  	void putXMM3() const
 982  	{
 983  		static const struct Tbl {
 984  			const char *name;
 985  			uint64_t op1;
 986  			uint64_t op2;
 987  		} tbl[] = {
 988  			{ "cvtpi2ps", XMM, MMX|MEM },
 989  			{ "cvtps2pi", MMX, XMM|MEM },
 990  			{ "cvtsi2ss", XMM, REG32|MEM },
 991  			{ "cvtss2si", REG32, XMM|MEM },
 992  			{ "cvttps2pi", MMX, XMM|MEM },
 993  			{ "cvttss2si", REG32, XMM|MEM },
 994  			{ "cvtpi2pd", XMM, MMX|MEM },
 995  			{ "cvtpd2pi", MMX, XMM|MEM },
 996  			{ "cvtsi2sd", XMM, REG32|MEM },
 997  			{ "cvtsd2si", REG32, XMM|MEM },
 998  			{ "cvttpd2pi", MMX, XMM|MEM },
 999  			{ "cvttsd2si", REG32, XMM|MEM },
1000  		};
1001  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1002  			const Tbl *p = &tbl[i];
1003  			put(p->name, p->op1, p->op2);
1004  		}
1005  	}
1006  	void putXMM4() const
1007  	{
1008  		static const char tbl[][16] = {
1009  			"movhps",
1010  			"movlps",
1011  			"movhpd",
1012  			"movlpd",
1013  		};
1014  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1015  			const char *p = tbl[i];
1016  			put(p, XMM, MEM);
1017  			put(p, MEM, XMM);
1018  		}
1019  	}
1020  	void putCmov() const
1021  	{
1022  		const char tbl[][4] = {
1023  			"o",
1024  			"no",
1025  			"b",
1026  			"c",
1027  			"nae",
1028  			"nb",
1029  			"nc",
1030  			"ae",
1031  			"e",
1032  			"z",
1033  			"ne",
1034  			"nz",
1035  			"be",
1036  			"na",
1037  			"nbe",
1038  			"a",
1039  			"s",
1040  			"ns",
1041  			"p",
1042  			"pe",
1043  			"np",
1044  			"po",
1045  			"l",
1046  			"nge",
1047  			"nl",
1048  			"ge",
1049  			"le",
1050  			"ng",
1051  			"nle",
1052  			"g",
1053  		};
1054  #if defined(__GNUC__) && !defined(__clang__)
1055  	#pragma GCC diagnostic push
1056  	#pragma GCC diagnostic ignored "-Wformat-truncation" // wrong detection
1057  #endif
1058  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1059  			char buf[32];
1060  			snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
1061  			put(buf, REG16, REG16|MEM);
1062  			put(buf, REG32, REG32|MEM);
1063  			put(buf, REG64, REG64|MEM);
1064  			snprintf(buf, sizeof(buf), "set%s", tbl[i]);
1065  			put(buf, REG8|REG8_3|MEM);
1066  		}
1067  #if defined(__GNUC__) && !defined(__clang__)
1068  	#pragma GCC diagnostic pop
1069  #endif
1070  	}
1071  	void putReg1() const
1072  	{
1073  		// (REG, REG|MEM)
1074  		{
1075  			static const char tbl[][16] = {
1076  				"adc",
1077  				"add",
1078  				"and_",
1079  				"cmp",
1080  				"or_",
1081  				"sbb",
1082  				"sub",
1083  				"xor_",
1084  			};
1085  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1086  				const std::string s = removeUnderScore(tbl[i]);
1087  				const char *p = s.c_str();
1088  				put(p, REG32, REG32|MEM);
1089  				put(p, REG64, REG64|MEM);
1090  				put(p, REG16, REG16|MEM);
1091  				put(p, REG8|REG8_3, REG8|MEM);
1092  				put(p, MEM, REG32e|REG16|REG8|REG8_3);
1093  
1094  				put(p, MEM8, IMM8|NEG8);
1095  				put(p, MEM16, IMM8|IMM16|NEG8|NEG16);
1096  				put(p, MEM32, IMM8|IMM32|NEG8|NEG32);
1097  
1098  				put(p, REG64|RAX, IMM8|NEG8);
1099  				put(p, REG64|RAX, "0x12345678", "0x12345678");
1100  				put(p, REG64|RAX, "192", "192");
1101  				put(p, REG64|RAX, "0x1234", "0x1234");
1102  				put(p, REG32|EAX, IMM8|IMM32|NEG8);
1103  				put(p, REG16|AX, IMM8|IMM16|NEG8|NEG16);
1104  				put(p, REG8|REG8_3|AL, IMM|NEG8);
1105  			}
1106  		}
1107  		{
1108  			const char tbl[][8] = {
1109  				"adcx",
1110  				"adox",
1111  			};
1112  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1113  				const char *p = tbl[i];
1114  				put(p, REG32, REG32|MEM);
1115  				put(p, REG64, REG64|MEM);
1116  			}
1117  		}
1118  	}
1119  	void putBt() const
1120  	{
1121  		static const char tbl[][16] = {
1122  			"bt",
1123  			"bts",
1124  			"btr",
1125  			"btc",
1126  		};
1127  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1128  			const char *p = tbl[i];
1129  			put(p, MEM|REG16, REG16);
1130  			put(p, MEM|REG32, REG32);
1131  			put(p, MEM|REG64, REG64);
1132  			put(p, MEM16|REG16, IMM);
1133  		}
1134  	}
1135  	void putRorM() const
1136  	{
1137  		static const char tbl[][16] = {
1138  			"inc",
1139  			"dec",
1140  			"div",
1141  			"idiv",
1142  			"imul",
1143  			"mul",
1144  			"neg",
1145  			"not_",
1146  		};
1147  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1148  			const std::string s = removeUnderScore(tbl[i]);
1149  			const char *p = s.c_str();
1150  			put(p, REG32e|REG16|REG8|REG8_3);
1151  			put(p, MEM32|MEM16|MEM8);
1152  		}
1153  		const char *p = "imul";
1154  		put(p, REG16, REG16|MEM16);
1155  		put(p, REG32, REG32|MEM32);
1156  		put(p, REG64, REG64|MEM);
1157  		put(p, REG16, REG16|MEM, IMM8|IMM16);
1158  		put(p, REG32, REG32|MEM, IMM8|IMM32);
1159  		put(p, REG64, REG64|MEM, IMM8|IMM32);
1160  	}
1161  	void putPushPop() const
1162  	{
1163  		/*
1164  			QQQ:
1165  			push byte 2
1166  			push dword 2
1167  			reduce 4-byte stack
1168  			push word 2
1169  			reduce 2-byte stack, so I can't support it
1170  		*/
1171  
1172  		put("push", IMM8|IMM32);
1173  		if (isXbyak_) {
1174  			puts("push(word, 1000);dump();");
1175  		} else {
1176  			puts("push word 1000");
1177  		}
1178  
1179  		put("push", REG16|MEM16);
1180  		put("pop", REG16|MEM16);
1181  #ifdef XBYAK64
1182  		put("push", REG64|IMM32|MEM64);
1183  		put("pop", REG64|MEM64);
1184  #else
1185  		put("push", REG32|IMM32|MEM32);
1186  		put("pop", REG32|MEM32);
1187  #endif
1188  	}
1189  	void putPushPop8_16() const
1190  	{
1191  		const struct {
1192  			int b;
1193  			uint32_t v;
1194  		} tbl[] = {
1195  			{ 8, 0x7f },
1196  			{ 8, 0x80 },
1197  			{ 8, 0xff },
1198  			{ 8, 0x100 },
1199  			{ 8, 0x12345 },
1200  			{ 16, 0x7fff },
1201  			{ 16, 0x8000 },
1202  			{ 16, 0xffff },
1203  			{ 16, 0x10000 },
1204  			{ 16, 0x12345 },
1205  		};
1206  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1207  			const char *b = tbl[i].b == 8 ? "byte" : "word";
1208  			uint32_t v = tbl[i].v;
1209  			if (isXbyak_) {
1210  				printf("push(%s, 0x%x);dump();\n", b, v);
1211  			} else {
1212  				printf("push %s 0x%x\n", b, v);
1213  			}
1214  		}
1215  	}
1216  	void putTest() const
1217  	{
1218  		const char *p = "test";
1219  		put(p, REG32|MEM, REG32);
1220  		put(p, REG64|MEM, REG64);
1221  		put(p, REG16|MEM, REG16);
1222  		put(p, REG8|REG8_3|MEM, REG8|REG8_3);
1223  		put(p, REG32e|REG16|REG8|REG8_3|EAX|AX|AL|MEM32|MEM16|MEM8, IMM);
1224  	}
1225  	void putMov64() const
1226  	{
1227  		const struct {
1228  			const char *a;
1229  			const char *b;
1230  		} tbl[] = {
1231  			{ "0", "0" },
1232  			{ "0x123", "0x123" },
1233  			{ "0x12345678", "0x12345678" },
1234  			{ "0x7fffffff", "0x7fffffff" },
1235  			{ "0xffffffff", "0xffffffff" },
1236  			{ "0x80000000", "0x80000000" },
1237  			{ "2147483648U", "2147483648" },
1238  			{ "0x80000001", "0x80000001" },
1239  			{ "0xffffffffffffffff", "0xffffffffffffffff" },
1240  			{ "-1", "-1" },
1241  			{ "0xffffffff80000000", "0xffffffff80000000" },
1242  			{ "0xffffffff80000001", "0xffffffff80000001" },
1243  			{ "0xffffffff12345678", "0xffffffff12345678" },
1244  		};
1245  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1246  			put("mov", REG64, tbl[i].a, tbl[i].b);
1247  		}
1248  	}
1249  	void putLoadSeg() const
1250  	{
1251  		const struct Tbl {
1252  			const char *name;
1253  			bool support64Bit;
1254  		} tbl[] = {
1255  #ifdef XBYAK32
1256  			{ "lds", false },
1257  			{ "les", false },
1258  #endif
1259  			{ "lss", true },
1260  			{ "lfs", true },
1261  			{ "lgs", true },
1262  		};
1263  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1264  			const Tbl *p = &tbl[i];
1265  			put(p->name, REG16|REG32, MEM);
1266  #ifdef XBYAK64
1267  			if (p->support64Bit) {
1268  				put(p->name, REG64, MEM);
1269  			}
1270  #endif
1271  		}
1272  	}
1273  	// only nasm
1274  	void putMovImm64() const
1275  	{
1276  		put("mov", REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
1277  		put("mov", REG64, "0x12345678", "0x12345678");
1278  		put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678");
1279  		put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM);
1280  
1281  		put("mov", EAX, "ptr[(void*)-1]", "[-1]");
1282  		put("mov", EAX, "ptr[(void*)0x7fffffff]", "[0x7fffffff]");
1283  		put("mov", EAX, "ptr[(void*)0xffffffffffffffff]", "[0xffffffffffffffff]");
1284  	}
1285  	void putEtc() const
1286  	{
1287  		{
1288  			const char *p = "ret";
1289  			put(p);
1290  			put(p, IMM);
1291  			p = "retf";
1292  			put(p);
1293  			put(p, IMM);
1294  			p = "mov";
1295  			put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX);
1296  			put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX);
1297  			put(p, AX|REG16|MEM|MEM_ONLY_DISP, REG16|AX);
1298  			put(p, AL|REG8|REG8_3|MEM|MEM_ONLY_DISP, REG8|REG8_3|AL);
1299  			put(p, REG32e|REG16|REG8|RAX|EAX|AX|AL, MEM|MEM_ONLY_DISP);
1300  			put(p, MEM32|MEM16|MEM8, IMM);
1301  			put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
1302  			put("movbe", REG16|REG32e, MEM);
1303  			put("movbe", MEM, REG16|REG32e);
1304  #if defined(XBYAK64) && !defined(__ILP32__)
1305  			put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
1306  			put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
1307  			put(p, "qword [rax], 0");
1308  			put(p, "qword [rax], 0x12");
1309  			put(p, "qword [rax], 0x1234");
1310  			put(p, "qword [rax], 0x12345678");
1311  //			put(p, "qword [rax], 0x123456789ab");
1312  			put(p, "qword [rax], 1000000");
1313  			put(p, "rdx, qword [rax]");
1314  #endif
1315  			put("mov", EAX, "ptr [eax + ecx * 0]", "[eax + ecx * 0]"); // ignore scale = 0
1316  		}
1317  		{
1318  			const char tbl[][8] = {
1319  				"movsx",
1320  				"movzx",
1321  			};
1322  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1323  				const char *p = tbl[i];
1324  				put(p, REG64, REG16|REG8|MEM8|MEM16);
1325  				put(p, REG32, REG16|REG8|MEM8|MEM16);
1326  				put(p, REG16, REG8|MEM8);
1327  				put(p, "eax, ah");
1328  			}
1329  		}
1330  #ifdef XBYAK64
1331  		put("movsxd", REG64, REG32|MEM32);
1332  #endif
1333  		put("cmpxchg8b", MEM);
1334  #ifdef XBYAK64
1335  		put("cmpxchg16b", MEM);
1336  		put("fxrstor64", MEM);
1337  		put("xbegin", "0x12345678");
1338  #endif
1339  		{
1340  			const char tbl[][8] = {
1341  				"xadd",
1342  				"cmpxchg"
1343  			};
1344  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1345  				const char *p = tbl[i];
1346  				put(p, REG8|MEM, REG8);
1347  				put(p, REG16|MEM, REG16);
1348  				put(p, REG32|MEM, REG32);
1349  				put(p, REG64|MEM, REG64);
1350  			}
1351  		}
1352  
1353  		put("xchg", AL|REG8, AL|REG8|MEM);
1354  		put("xchg", MEM, AL|REG8);
1355  		put("xchg", AX|REG16, AX|REG16|MEM);
1356  		put("xchg", MEM, AX|REG16);
1357  		put("xchg", EAX|REG32, EAX|REG32|MEM);
1358  		put("xchg", MEM, EAX|REG32);
1359  		put("xchg", REG64, REG64|MEM);
1360  		put("xabort", IMM8);
1361  	}
1362  	void putShift() const
1363  	{
1364  		const char tbl[][8] = {
1365  			"rcl",
1366  			"rcr",
1367  			"rol",
1368  			"ror",
1369  			"sar",
1370  			"shl",
1371  			"shr",
1372  
1373  			"sal",
1374  		};
1375  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1376  			const char *p = tbl[i];
1377  			put(p, REG32e|REG16|REG8|MEM32|MEM16|MEM8, ONE|CL|IMM);
1378  		}
1379  	}
1380  	void putShxd() const
1381  	{
1382  		const char tbl[][8] = {
1383  			"shld",
1384  			"shrd",
1385  		};
1386  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1387  			const char *p = tbl[i];
1388  			put(p, REG64|MEM, REG64, IMM|CL);
1389  			put(p, REG32|MEM, REG32, IMM|CL);
1390  			put(p, REG16|MEM, REG16, IMM|CL);
1391  		}
1392  	}
1393  	void putBs() const
1394  	{
1395  		const char tbl[][8] = {
1396  			"bsr",
1397  			"bsf",
1398  			"lzcnt",
1399  			"tzcnt",
1400  			"popcnt",
1401  		};
1402  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1403  			const char *p = tbl[i];
1404  			put(p, REG64, REG64|MEM);
1405  			put(p, REG32, REG32|MEM);
1406  			put(p, REG16, REG16|MEM);
1407  		}
1408  	}
1409  	void putSSSE3() const
1410  	{
1411  		const char tbl[][16] = {
1412  			"pshufb",
1413  			"phaddw",
1414  			"phaddd",
1415  			"phaddsw",
1416  			"pmaddubsw",
1417  			"phsubw",
1418  			"phsubd",
1419  			"phsubsw",
1420  			"psignb",
1421  			"psignw",
1422  			"psignd",
1423  			"pmulhrsw",
1424  			"pabsb",
1425  			"pabsw",
1426  			"pabsd",
1427  		};
1428  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1429  			const char *p = tbl[i];
1430  			put(p, XMM, XMM|MEM);
1431  			put(p, MMX, MMX|MEM);
1432  		}
1433  		put("palignr", XMM, XMM|MEM, IMM8);
1434  		put("palignr", MMX, MMX|MEM, IMM8);
1435  	}
1436  	void putSSE4_1() const
1437  	{
1438  		const char tbl[][16] = {
1439  			"blendvpd",
1440  			"blendvps",
1441  			"packusdw",
1442  			"pblendvb",
1443  			"pcmpeqq",
1444  			"ptest",
1445  			"pmovsxbw",
1446  			"pmovsxbd",
1447  			"pmovsxbq",
1448  			"pmovsxwd",
1449  			"pmovsxwq",
1450  			"pmovsxdq",
1451  			"pmovzxbw",
1452  			"pmovzxbd",
1453  			"pmovzxbq",
1454  			"pmovzxwd",
1455  			"pmovzxwq",
1456  			"pmovzxdq",
1457  			"pminsb",
1458  			"pminsd",
1459  			"pminuw",
1460  			"pminud",
1461  			"pmaxsb",
1462  			"pmaxsd",
1463  			"pmaxuw",
1464  			"pmaxud",
1465  			"pmuldq",
1466  			"pmulld",
1467  			"phminposuw",
1468  			"pcmpgtq",
1469  			"aesdec",
1470  			"aesdeclast",
1471  			"aesenc",
1472  			"aesenclast",
1473  			"aesimc",
1474  		};
1475  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1476  			const char *p = tbl[i];
1477  			put(p, XMM, XMM|MEM);
1478  		}
1479  	}
1480  	void putSSE4_2() const
1481  	{
1482  		{
1483  			const char tbl[][16] = {
1484  				"blendpd",
1485  				"blendps",
1486  				"dppd",
1487  				"dpps",
1488  				"mpsadbw",
1489  				"pblendw",
1490  				"roundps",
1491  				"roundpd",
1492  				"roundss",
1493  				"roundsd",
1494  				"pcmpestrm",
1495  				"pcmpestri",
1496  				"pcmpistrm",
1497  				"pcmpistri",
1498  				"pclmulqdq",
1499  				"aeskeygenassist",
1500  			};
1501  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1502  				const char *p = tbl[i];
1503  				put(p, XMM, XMM|MEM, IMM);
1504  			}
1505  		}
1506  		put("extractps", REG32e|MEM, XMM, IMM);
1507  		put("pextrw", REG32e|MEM, XMM, IMM); // pextrw for REG32 is for MMX2
1508  		put("pextrb", REG32e|MEM, XMM, IMM);
1509  		put("pextrd", REG32|MEM, XMM, IMM);
1510  
1511  		put("insertps", XMM, XMM|MEM, IMM);
1512  		put("pinsrb", XMM, REG32|MEM, IMM);
1513  		put("pinsrd", XMM, REG32|MEM, IMM);
1514  		put("movntdqa", XMM, MEM);
1515  		put("crc32", REG32, REG8|REG16|REG32|MEM8|MEM16|MEM32);
1516  		put("crc32", REG64, REG64|REG8|MEM8);
1517  #ifdef XBYAK64
1518  		put("pextrq", REG64|MEM, XMM, IMM);
1519  		put("pinsrq", XMM, REG64|MEM, IMM);
1520  #endif
1521  
1522  	}
1523  	void putVpclmulqdq()
1524  	{
1525  		const char tbl[][16] = {
1526  			"vpclmullqlqdq",
1527  			"vpclmulhqlqdq",
1528  			"vpclmullqhqdq",
1529  			"vpclmulhqhqdq",
1530  		};
1531  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1532  			const char *p = tbl[i] + 1; // remove the top 'v'
1533  			put(p, XMM, XMM|MEM);
1534  			p = tbl[i]; // use the top 'v'
1535  			put(p, XMM, XMM, XMM|MEM);
1536  			put(p, YMM, YMM, YMM|MEM);
1537  			put(p, ZMM, ZMM, ZMM|MEM);
1538  		}
1539  	}
1540  	void putSHA() const
1541  	{
1542  		put("sha1rnds4", XMM, XMM|MEM, IMM);
1543  		put("sha1nexte", XMM, XMM|MEM);
1544  		put("sha1msg1", XMM, XMM|MEM);
1545  		put("sha1msg2", XMM, XMM|MEM);
1546  		put("sha256rnds2", XMM, XMM|MEM);
1547  		put("sha256msg1", XMM, XMM|MEM);
1548  		put("sha256msg2", XMM, XMM|MEM);
1549  	}
1550  	void putMPX() const
1551  	{
1552  #ifdef XBYAK64
1553  		const uint64_t reg = REG64;
1554  #else
1555  		const uint64_t reg = REG32;
1556  #endif
1557  		put("bndcl", BNDREG, reg|MEM);
1558  		put("bndcu", BNDREG, reg|MEM);
1559  		put("bndcn", BNDREG, reg|MEM);
1560  		put("bndldx", BNDREG, MEM);
1561  		put("bndmk", BNDREG, MEM);
1562  		put("bndmov", BNDREG, BNDREG|MEM);
1563  		put("bndstx", MEM, BNDREG);
1564  		put("bndstx", "ptr [eax]", "[eax]", BNDREG);
1565  		put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG);
1566  		put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG);
1567  		put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG);
1568  		put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG);
1569  		put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG);
1570  		put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG);
1571  		put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG);
1572  		put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG);
1573  		put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG);
1574  		put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG);
1575  #ifdef XBYAK64
1576  		put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG);
1577  		put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG);
1578  		put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG);
1579  #endif
1580  	}
1581  	void putFpuMem16_32() const
1582  	{
1583  		const char tbl[][8] = {
1584  			"fiadd",
1585  			"fidiv",
1586  			"fidivr",
1587  			"ficom",
1588  			"ficomp",
1589  			"fimul",
1590  			"fist",
1591  			"fisub",
1592  			"fisubr",
1593  		};
1594  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1595  			const char *p = tbl[i];
1596  			put(p, MEM16|MEM32);
1597  		}
1598  	}
1599  	void putFpuMem32_64() const
1600  	{
1601  		const char tbl[][8] = {
1602  			"fadd",
1603  			"fcom",
1604  			"fcomp",
1605  			"fdiv",
1606  			"fdivr",
1607  			"fld",
1608  			"fmul",
1609  			"fst",
1610  			"fstp",
1611  			"fsub",
1612  			"fsubr",
1613  		};
1614  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1615  			const char *p = tbl[i];
1616  			put(p, MEM32|MEM64);
1617  		}
1618  	}
1619  	void putFpuMem16_32_64() const
1620  	{
1621  		const char tbl[][8] = {
1622  			"fild",
1623  			"fistp",
1624  			"fisttp",
1625  		};
1626  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1627  			const char *p = tbl[i];
1628  			put(p, MEM16|MEM32|MEM64);
1629  		}
1630  	}
1631  	void putFpuFpu() const
1632  	{
1633  		const struct Tbl {
1634  			const char *name;
1635  			int mode; /* 1:only (st0, sti), 2: only (sti, st0), 3: both */
1636  		} tbl[] = {
1637  			{ "fadd", 3 },
1638  			{ "faddp", 2 },
1639  			{ "fcmovb", 1 },
1640  			{ "fcmove", 1 },
1641  			{ "fcmovbe", 1 },
1642  			{ "fcmovu", 1 },
1643  			{ "fcmovnb", 1 },
1644  			{ "fcmovne", 1 },
1645  			{ "fcmovnbe", 1 },
1646  			{ "fcmovnu", 1 },
1647  			{ "fcomi", 1 },
1648  			{ "fcomip", 1 },
1649  			{ "fucomi", 1 },
1650  			{ "fucomip", 1 },
1651  			{ "fdiv", 3 },
1652  			{ "fdivp", 2 },
1653  			{ "fdivr", 3 },
1654  			{ "fdivrp", 2 },
1655  			{ "fmul", 3 },
1656  			{ "fmulp", 2 },
1657  			{ "fsub", 3 },
1658  			{ "fsubp", 2 },
1659  			{ "fsubr", 3 },
1660  			{ "fsubrp", 2 },
1661  		};
1662  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1663  			const Tbl *p = &tbl[i];
1664  			if (p->mode & 1) put(p->name, ST0, STi);
1665  			if (p->mode & 2) put(p->name, STi, ST0);
1666  			if (p->mode) put(p->name, STi);
1667  		}
1668  	}
1669  	void putFpu() const
1670  	{
1671  		const char tbl[][16] = {
1672  			"fcom",
1673  			"fcomp",
1674  			"ffree",
1675  			"fld",
1676  			"fst",
1677  			"fstp",
1678  			"fucom",
1679  			"fucomp",
1680  			"fxch",
1681  		};
1682  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1683  			put(tbl[i], STi);
1684  		}
1685  	}
1686  	void putAVX1()
1687  	{
1688  		const struct Tbl {
1689  			const char *name;
1690  			bool only_pd_ps;
1691  		} tbl[] = {
1692  			{ "add", false },
1693  			{ "sub", false },
1694  			{ "mul", false },
1695  			{ "div", false },
1696  			{ "max", false },
1697  			{ "min", false },
1698  			{ "and", true },
1699  			{ "andn", true },
1700  			{ "or", true },
1701  			{ "xor", true },
1702  
1703  			{ "addsub", true },
1704  			{ "hadd", true },
1705  			{ "hsub", true },
1706  		};
1707  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1708  			const struct Suf {
1709  				const char *suf;
1710  				bool supportYMM;
1711  			} suf[] = {
1712  				{ "pd", true },
1713  				{ "ps", true },
1714  				{ "sd", false },
1715  				{ "ss", false },
1716  			};
1717  			for (size_t j = 0; j < NUM_OF_ARRAY(suf); j++) {
1718  				if (tbl[i].only_pd_ps && j == 2) break;
1719  				std::string name = std::string("v") + tbl[i].name + suf[j].suf;
1720  				const char *p = name.c_str();
1721  				put(p, XMM, XMM | MEM);
1722  				put(p, XMM, XMM, XMM | MEM);
1723  				if (!suf[j].supportYMM) continue;
1724  				put(p, YMM, YMM | MEM);
1725  				put(p, YMM, YMM, YMM | MEM);
1726  			}
1727  		}
1728  	}
1729  	void putAVX_X_X_XM_omit()
1730  	{
1731  		const struct Tbl {
1732  			const char *name;
1733  			bool supportYMM;
1734  		} tbl[] = {
1735  			{ "vaesenc", false },
1736  			{ "vaesenclast", false },
1737  			{ "vaesdec", false },
1738  			{ "vaesdeclast", false },
1739  			{ "vcvtsd2ss", false },
1740  			{ "vcvtss2sd", false },
1741  			{ "vpacksswb", true },
1742  			{ "vpackssdw", true },
1743  			{ "vpackuswb", true },
1744  			{ "vpackusdw", true },
1745  
1746  			{ "vpaddb", true },
1747  			{ "vpaddw", true },
1748  			{ "vpaddd", true },
1749  			{ "vpaddq", true },
1750  
1751  			{ "vpaddsb", true },
1752  			{ "vpaddsw", true },
1753  
1754  			{ "vpaddusb", true },
1755  			{ "vpaddusw", true },
1756  
1757  			{ "vpand", true },
1758  			{ "vpandn", true },
1759  			{ "vpavgb", true },
1760  			{ "vpavgw", true },
1761  
1762  			{ "vpcmpeqb", true },
1763  			{ "vpcmpeqw", true },
1764  			{ "vpcmpeqd", true },
1765  			{ "vpcmpeqq", true },
1766  
1767  			{ "vpcmpgtb", true },
1768  			{ "vpcmpgtw", true },
1769  			{ "vpcmpgtd", true },
1770  			{ "vpcmpgtq", true },
1771  
1772  			{ "vphaddw", true },
1773  			{ "vphaddd", true },
1774  			{ "vphaddsw", true },
1775  
1776  			{ "vphsubw", true },
1777  			{ "vphsubd", true },
1778  			{ "vphsubsw", true },
1779  			{ "vpmaddwd", true },
1780  			{ "vpmaddubsw", true },
1781  
1782  			{ "vpmaxsb", true },
1783  			{ "vpmaxsw", true },
1784  			{ "vpmaxsd", true },
1785  
1786  			{ "vpmaxub", true },
1787  			{ "vpmaxuw", true },
1788  			{ "vpmaxud", true },
1789  
1790  			{ "vpminsb", true },
1791  			{ "vpminsw", true },
1792  			{ "vpminsd", true },
1793  
1794  			{ "vpminub", true },
1795  			{ "vpminuw", true },
1796  			{ "vpminud", true },
1797  
1798  			{ "vpmulhuw", true },
1799  			{ "vpmulhrsw", true },
1800  			{ "vpmulhw", true },
1801  			{ "vpmullw", true },
1802  			{ "vpmulld", true },
1803  
1804  			{ "vpmuludq", true },
1805  			{ "vpmuldq", true },
1806  
1807  			{ "vpor", true },
1808  			{ "vpsadbw", true },
1809  
1810  			{ "vpsignb", true },
1811  			{ "vpsignw", true },
1812  			{ "vpsignd", true },
1813  
1814  			{ "vpsllw", false },
1815  			{ "vpslld", false },
1816  			{ "vpsllq", false },
1817  
1818  			{ "vpsraw", false },
1819  			{ "vpsrad", false },
1820  			{ "vpsrlw", false },
1821  			{ "vpsrld", false },
1822  			{ "vpsrlq", false },
1823  
1824  			{ "vpsubb", true },
1825  			{ "vpsubw", true },
1826  			{ "vpsubd", true },
1827  			{ "vpsubq", true },
1828  
1829  			{ "vpsubsb", true },
1830  			{ "vpsubsw", true },
1831  
1832  			{ "vpsubusb", true },
1833  			{ "vpsubusw", true },
1834  
1835  			{ "vpunpckhbw", true },
1836  			{ "vpunpckhwd", true },
1837  			{ "vpunpckhdq", true },
1838  			{ "vpunpckhqdq", true },
1839  
1840  			{ "vpunpcklbw", true },
1841  			{ "vpunpcklwd", true },
1842  			{ "vpunpckldq", true },
1843  			{ "vpunpcklqdq", true },
1844  
1845  			{ "vpxor", true },
1846  			{ "vsqrtsd", false },
1847  			{ "vsqrtss", false },
1848  
1849  			{ "vunpckhpd", true },
1850  			{ "vunpckhps", true },
1851  			{ "vunpcklpd", true },
1852  			{ "vunpcklps", true },
1853  		};
1854  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1855  			const Tbl *p = &tbl[i];
1856  			put(p->name, XMM, XMM | MEM);
1857  			put(p->name, XMM, XMM, XMM | MEM);
1858  			if (!p->supportYMM) continue;
1859  			put(p->name, YMM, YMM | MEM);
1860  			put(p->name, YMM, YMM, YMM | MEM);
1861  		}
1862  	}
1863  	void putAVX_X_X_XM_IMM()
1864  	{
1865  		const struct Tbl {
1866  			const char *name;
1867  			bool supportYMM;
1868  		} tbl[] = {
1869  			{ "vblendpd", true },
1870  			{ "vblendps", true },
1871  			{ "vdppd", false },
1872  			{ "vdpps", true },
1873  			{ "vmpsadbw", true },
1874  			{ "vpblendw", true },
1875  			{ "vpblendd", true },
1876  			{ "vroundsd", false },
1877  			{ "vroundss", false },
1878  			{ "vpclmulqdq", false },
1879  			{ "vcmppd", true },
1880  			{ "vcmpps", true },
1881  			{ "vcmpsd", false },
1882  			{ "vcmpss", false },
1883  			{ "vinsertps", false },
1884  			{ "vpalignr", true },
1885  			{ "vshufpd", true },
1886  			{ "vshufps", true },
1887  		};
1888  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1889  			const Tbl *p = &tbl[i];
1890  			put(p->name, XMM, XMM, XMM | MEM, IMM);
1891  			put(p->name, XMM, XMM | MEM, IMM);
1892  			if (!p->supportYMM) continue;
1893  			put(p->name, YMM, YMM, YMM | MEM, IMM);
1894  			put(p->name, YMM, YMM | MEM, IMM);
1895  		}
1896  	}
1897  	void putAVX_X_XM_IMM()
1898  	{
1899  		const struct Tbl {
1900  			const char *name;
1901  			bool supportYMM;
1902  		} tbl[] = {
1903  			{ "vroundpd", true },
1904  			{ "vroundps", true },
1905  			{ "vpcmpestri", false },
1906  			{ "vpcmpestrm", false },
1907  			{ "vpcmpistri", false },
1908  			{ "vpcmpistrm", false },
1909  			{ "vpermilpd", true },
1910  			{ "vpermilps", true },
1911  			{ "vaeskeygenassist", false },
1912  			{ "vpshufd", true },
1913  			{ "vpshufhw", true },
1914  			{ "vpshuflw", true },
1915  		};
1916  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1917  			const Tbl *p = &tbl[i];
1918  			put(p->name, XMM, XMM | MEM, IMM);
1919  			if (!p->supportYMM) continue;
1920  			put(p->name, YMM, YMM | MEM, IMM);
1921  		}
1922  	}
1923  	void putAVX_X_X_XM()
1924  	{
1925  		const struct Tbl {
1926  			const char *name;
1927  			bool supportYMM;
1928  		} tbl[] = {
1929  			{ "vpermilpd", true },
1930  			{ "vpermilps", true },
1931  			{ "vpshufb", true },
1932  
1933  			{ "vpsllvd", true },
1934  			{ "vpsllvq", true },
1935  			{ "vpsravd", true },
1936  			{ "vpsrlvd", true },
1937  			{ "vpsrlvq", true },
1938  		};
1939  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1940  			const Tbl *p = &tbl[i];
1941  			put(p->name, XMM, XMM, XMM | MEM);
1942  			if (!p->supportYMM) continue;
1943  			put(p->name, YMM, YMM, YMM | MEM);
1944  		}
1945  	}
1946  	void putAVX_X_XM()
1947  	{
1948  		const struct Tbl {
1949  			const char *name;
1950  			bool supportYMM;
1951  		} tbl[] = {
1952  			{ "vaesimc", false },
1953  			{ "vtestps", true },
1954  			{ "vtestpd", true },
1955  			{ "vcomisd", false },
1956  			{ "vcomiss", false },
1957  			{ "vcvtdq2ps", true },
1958  			{ "vcvtps2dq", true },
1959  			{ "vcvttps2dq", true },
1960  			{ "vmovapd", true },
1961  			{ "vmovaps", true },
1962  			{ "vmovddup", true },
1963  			{ "vmovdqa", true },
1964  			{ "vmovdqu", true },
1965  			{ "vmovupd", true },
1966  			{ "vmovups", true },
1967  
1968  			{ "vpabsb", true },
1969  			{ "vpabsw", true },
1970  			{ "vpabsd", true },
1971  			{ "vphminposuw", false },
1972  
1973  			{ "vpmovsxbw", false },
1974  			{ "vpmovsxbd", false },
1975  			{ "vpmovsxbq", false },
1976  			{ "vpmovsxwd", false },
1977  			{ "vpmovsxwq", false },
1978  			{ "vpmovsxdq", false },
1979  
1980  			{ "vpmovzxbw", false },
1981  			{ "vpmovzxbd", false },
1982  			{ "vpmovzxbq", false },
1983  			{ "vpmovzxwd", false },
1984  			{ "vpmovzxwq", false },
1985  			{ "vpmovzxdq", false },
1986  
1987  			{ "vptest", true },
1988  			{ "vrcpps", true },
1989  			{ "vrcpss", false },
1990  
1991  			{ "vrsqrtps", true },
1992  			{ "vrsqrtss", false },
1993  
1994  			{ "vsqrtpd", true },
1995  			{ "vsqrtps", true },
1996  			{ "vucomisd", false },
1997  			{ "vucomiss", false },
1998  		};
1999  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2000  			const Tbl *p = &tbl[i];
2001  			put(p->name, XMM, XMM | MEM);
2002  			if (!p->supportYMM) continue;
2003  			put(p->name, YMM, YMM | MEM);
2004  		}
2005  	}
2006  	void putAVX_Y_XM()
2007  	{
2008  		const char *tbl[] = {
2009  			"vpmovsxbw",
2010  			"vpmovsxbd",
2011  			"vpmovsxbq",
2012  			"vpmovsxwd",
2013  			"vpmovsxwq",
2014  			"vpmovsxdq",
2015  			"vpmovzxbw",
2016  			"vpmovzxbd",
2017  			"vpmovzxbq",
2018  			"vpmovzxwd",
2019  			"vpmovzxwq",
2020  			"vpmovzxdq",
2021  		};
2022  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2023  			const char *name = tbl[i];
2024  			put(name, YMM, XMM);
2025  		}
2026  	}
2027  	void putAVX_M_X()
2028  	{
2029  		const struct Tbl {
2030  			const char *name;
2031  			bool supportYMM;
2032  		} tbl[] = {
2033  			{ "vmovapd", true },
2034  			{ "vmovaps", true },
2035  			{ "vmovdqa", true },
2036  			{ "vmovdqu", true },
2037  			{ "vmovupd", true },
2038  			{ "vmovups", true },
2039  		};
2040  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2041  			const Tbl *p = &tbl[i];
2042  			put(p->name, MEM, XMM);
2043  			if (!p->supportYMM) continue;
2044  			put(p->name, MEM, YMM);
2045  		}
2046  	}
2047  	void putAVX_X_X_IMM_omit()
2048  	{
2049  		const struct Tbl {
2050  			const char *name;
2051  			bool support_Y_Y_X;
2052  		} tbl[] = {
2053  			{ "vpslldq", false },
2054  			{ "vpsrldq", false },
2055  			{ "vpsllw", true },
2056  			{ "vpslld", true },
2057  			{ "vpsllq", true },
2058  			{ "vpsraw", true },
2059  			{ "vpsrad", true },
2060  			{ "vpsrlw", true },
2061  			{ "vpsrld", true },
2062  			{ "vpsrlq", true },
2063  		};
2064  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2065  			const Tbl& p = tbl[i];
2066  			put(p.name, XMM, XMM, IMM);
2067  			put(p.name, YMM, YMM, IMM);
2068  			put(p.name, YMM, IMM);
2069  			put(p.name, _ZMM, _ZMM, IMM8);
2070  #ifdef XBYAK64
2071  			put(p.name, _XMM3, _XMM3, IMM8);
2072  			put(p.name, _YMM3, _YMM3, IMM8);
2073  #endif
2074  			if (p.support_Y_Y_X) {
2075  				put(p.name, YMM, YMM, XMM);
2076  			}
2077  		}
2078  	}
2079  	void putFMA()
2080  	{
2081  		const struct Tbl {
2082  			const char *name;
2083  			bool supportYMM;
2084  		} tbl[] = {
2085  			{ "vfmadd", true },
2086  			{ "vfmadd", false },
2087  			{ "vfmaddsub", true },
2088  			{ "vfmsubadd", true },
2089  			{ "vfmsub", true },
2090  			{ "vfmsub", false },
2091  			{ "vfnmadd", true },
2092  			{ "vfnmadd", false },
2093  			{ "vfnmsub", true },
2094  			{ "vfnmsub", false },
2095  		};
2096  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2097  			const Tbl& p = tbl[i];
2098  			const struct Ord {
2099  				const char *name;
2100  			} ord[] = {
2101  				{ "132" },
2102  				{ "213" },
2103  				{ "231" },
2104  			};
2105  			for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
2106  				const char sufTbl[][2][8] = {
2107  					{ "pd", "ps" },
2108  					{ "sd", "ss" },
2109  				};
2110  				for (size_t k = 0; k < 2; k++) {
2111  					const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
2112  					std::string name = std::string(p.name) + ord[j].name + suf;
2113  					const char *q = name.c_str();
2114  					put(q, XMM, XMM, XMM | MEM);
2115  					if (!p.supportYMM) continue;
2116  					put(q, YMM, YMM, YMM | MEM);
2117  				}
2118  			}
2119  		}
2120  	}
2121  	void putAVX2()
2122  	{
2123  		put("vextractps", REG32 | MEM, XMM, IMM);
2124  		put("vldmxcsr", MEM);
2125  		put("vstmxcsr", MEM);
2126  		put("vmaskmovdqu", XMM, XMM);
2127  
2128  		put("vmovd", XMM, REG32 | MEM);
2129  		put("vmovd", REG32 | MEM, XMM);
2130  
2131  		put("vmovq", XMM, XMM | MEM);
2132  		put("vmovq", MEM, XMM);
2133  
2134  		put("vmovhlps", XMM, XMM);
2135  		put("vmovhlps", XMM, XMM, XMM);
2136  		put("vmovlhps", XMM, XMM);
2137  		put("vmovlhps", XMM, XMM, XMM);
2138  
2139  		{
2140  			const char tbl[][16] = {
2141  				"vmovhpd",
2142  				"vmovhps",
2143  				"vmovlpd",
2144  				"vmovlps",
2145  			};
2146  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2147  				put(tbl[i], XMM, XMM, MEM);
2148  				put(tbl[i], XMM, MEM);
2149  				put(tbl[i], MEM, XMM);
2150  			}
2151  		}
2152  		put("vmovmskpd", REG32e, XMM | YMM);
2153  		put("vmovmskps", REG32e, XMM | YMM);
2154  
2155  		put("vmovntdq", MEM, XMM | YMM);
2156  		put("vmovntpd", MEM, XMM | YMM);
2157  		put("vmovntdqa", XMM | YMM, MEM);
2158  
2159  		{
2160  			const char tbl[][8] = { "vmovsd", "vmovss" };
2161  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2162  				put(tbl[i], XMM, XMM, XMM);
2163  				put(tbl[i], XMM, XMM | MEM);
2164  				put(tbl[i], MEM, XMM);
2165  			}
2166  		}
2167  		put("vpextrb", REG32e|MEM, XMM, IMM);
2168  		put("vpextrd", REG32|MEM, XMM, IMM);
2169  
2170  		for (int i = 0; i < 3; i++) {
2171  			const char tbl[][8] = { "vpinsrb", "vpinsrw", "vpinsrd" };
2172  			put(tbl[i], XMM, XMM, REG32|MEM, IMM);
2173  			put(tbl[i], XMM, REG32|MEM, IMM);
2174  		}
2175  
2176  		put("vpmovmskb", REG32e, XMM|YMM);
2177  
2178  		{
2179  			const struct Tbl {
2180  				const char *name;
2181  				bool supportYMM;
2182  			} tbl[] = {
2183  				{ "vblendvpd", true },
2184  				{ "vblendvps", true },
2185  				{ "vpblendvb", true },
2186  			};
2187  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2188  				const Tbl& p = tbl[i];
2189  				put(p.name, XMM, XMM, XMM | MEM, XMM);
2190  				put(p.name, XMM, XMM | MEM, XMM);
2191  				if (!p.supportYMM) continue;
2192  				put(p.name, YMM, YMM, YMM | MEM, YMM);
2193  				put(p.name, YMM, YMM | MEM, YMM);
2194  			}
2195  		}
2196  		// cvt
2197  		{
2198  			put("vcvtss2si", REG32e, XMM | MEM);
2199  			put("vcvttss2si", REG32e, XMM | MEM);
2200  			put("vcvtsd2si", REG32e, XMM | MEM);
2201  			put("vcvttsd2si", REG32e, XMM | MEM);
2202  
2203  			put("vcvtsi2ss", XMM, XMM, REG32e | MEM);
2204  			put("vcvtsi2ss", XMM, REG32e | MEM);
2205  
2206  			put("vcvtsi2sd", XMM, XMM, REG32e | MEM);
2207  			put("vcvtsi2sd", XMM, REG32e | MEM);
2208  #ifdef XBYAK64
2209  			put("vcvtsi2sd", XMM, XMM, MEM64);
2210  			put("vcvtsi2sd", XMM, MEM64);
2211  #endif
2212  
2213  			put("vcvtps2pd", XMM | YMM, XMM | MEM);
2214  			put("vcvtdq2pd", XMM | YMM, XMM | MEM);
2215  
2216  			put("vcvtpd2ps", XMM, XMM | YMM | MEM);
2217  			put("vcvtpd2dq", XMM, XMM | YMM | MEM);
2218  			put("vcvttpd2dq", XMM, XMM | YMM | MEM);
2219  
2220  			put("vcvtph2ps", XMM | YMM, XMM | MEM);
2221  			put("vcvtps2ph", XMM | MEM, XMM | YMM, IMM8);
2222  		}
2223  #ifdef XBYAK64
2224  		put("vmovq", XMM, REG64);
2225  		put("vmovq", REG64, XMM);
2226  
2227  		put("vpextrq", REG64|MEM, XMM, IMM);
2228  
2229  		put("vpinsrq", XMM, XMM, REG64|MEM, IMM);
2230  		put("vpinsrq", XMM, REG64|MEM, IMM);
2231  
2232  #endif
2233  	}
2234  	void putFMA2()
2235  	{
2236  #ifdef USE_YASM
2237  		put("vextractf128", XMM | MEM, YMM, IMM);
2238  		put("vextracti128", XMM | MEM, YMM, IMM);
2239  		put("vmaskmovps", MEM, YMM, YMM);
2240  		put("vmaskmovpd", MEM, YMM, YMM);
2241  		put("vlddqu", XMM | YMM, MEM);
2242  
2243  		put("vmovshdup", XMM, XMM | MEM);
2244  		put("vmovshdup", YMM, YMM | MEM);
2245  		put("vmovsldup", XMM, XMM | MEM);
2246  		put("vmovsldup", YMM, YMM | MEM);
2247  
2248  		// QQQ:nasm is wrong
2249  		put("vpcmpeqq", XMM, XMM | MEM);
2250  		put("vpcmpeqq", XMM, XMM, XMM | MEM);
2251  		put("vpcmpgtq", XMM, XMM | MEM);
2252  		put("vpcmpgtq", XMM, XMM, XMM | MEM);
2253  
2254  		put("vmovntps", MEM, XMM | YMM); // nasm error
2255  #else
2256  		put("vmaskmovps", XMM, XMM, MEM);
2257  		put("vmaskmovps", YMM, YMM, MEM);
2258  
2259  		put("vmaskmovpd", YMM, YMM, MEM);
2260  		put("vmaskmovpd", XMM, XMM, MEM);
2261  
2262  		put("vmaskmovps", MEM, XMM, XMM);
2263  		put("vmaskmovpd", MEM, XMM, XMM);
2264  #endif
2265  	}
2266  	void putCmp()
2267  	{
2268  		const char pred[32][16] = {
2269  			"eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
2270  			"eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
2271  			"true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
2272  			"eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
2273  		};
2274  		const char suf[][4] = { "pd", "ps", "sd", "ss" };
2275  		for (int i = 0; i < 4; i++) {
2276  			for (int j = 0; j < 32; j++) {
2277  				if (j < 8) {
2278  					put((std::string("cmp") + pred[j] + suf[i]).c_str(), XMM, XMM | MEM);
2279  				}
2280  				std::string str = std::string("vcmp") + pred[j] + suf[i];
2281  				const char *p = str.c_str();
2282  				put(p, XMM, XMM | MEM);
2283  				put(p, XMM, XMM, XMM | MEM);
2284  				if (i >= 2) continue;
2285  				put(p, YMM, YMM | MEM);
2286  				put(p, YMM, YMM, YMM | MEM);
2287  			}
2288  		}
2289  	}
2290  	void putRip()
2291  	{
2292  		const char tbl[][2][64] = {
2293  			{ "mov(byte [rip - 10], 3);dump();", "mov byte [rip - 10], 3" },
2294  			{ "mov(word [rip - 10], 3);dump();", "mov word [rip - 10], 3" },
2295  			{ "mov(dword[rip - 10], 3);dump();", "mov dword [rip - 10], 3" },
2296  			{ "mov(qword [rip - 10], 3);dump();", "mov qword [rip - 10], 3" },
2297  			{ "mov(ptr [rip - 10], al);dump();", "mov byte [rip - 10], al" },
2298  			{ "mov(ptr [rip - 10], ax);dump();", "mov word [rip - 10], ax" },
2299  			{ "mov(ptr [rip - 10], eax);dump();", "mov dword [rip - 10], eax" },
2300  			{ "mov(ptr [rip - 10], rax);dump();", "mov qword [rip - 10], rax" },
2301  		};
2302  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2303  			puts(tbl[i][isXbyak_ ? 0 : 1]);
2304  		}
2305  	}
2306  public:
2307  	Test(bool isXbyak)
2308  		: isXbyak_(isXbyak)
2309  		, funcNum_(1)
2310  	{
2311  		if (!isXbyak_) return;
2312  		printf("%s",
2313  			"    void gen0()\n"
2314  			"    {\n");
2315  	}
2316  	/*
2317  		gcc and vc give up to compile this source,
2318  		so I split functions.
2319  	*/
2320  	void separateFunc()
2321  	{
2322  		if (!isXbyak_) return;
2323  		printf(
2324  			"    }\n"
2325  			"    void gen%d()\n"
2326  			"    {\n", funcNum_++);
2327  	}
2328  	~Test()
2329  	{
2330  		if (!isXbyak_) return;
2331  		printf("%s",
2332  			"    }\n"
2333  			"    void gen()\n"
2334  			"    {\n");
2335  		for (int i = 0; i < funcNum_; i++) {
2336  			printf(
2337  			"        gen%d();\n", i);
2338  		}
2339  		printf(
2340  			"    }\n");
2341  	}
2342  	void putGprR_R_RM()
2343  	{
2344  		const char *tbl[] = {
2345  			"andn",
2346  			"mulx",
2347  			"pdep",
2348  			"pext",
2349  		};
2350  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2351  			const char *name = tbl[i];
2352  			put(name, REG32, REG32, REG32 | MEM);
2353  #ifdef XBYAK64
2354  			put(name, REG64, REG64, REG64 | MEM);
2355  #endif
2356  		}
2357  	}
2358  	void putGprR_RM_R()
2359  	{
2360  		const char *tbl[] = {
2361  			"bextr",
2362  			"bzhi",
2363  			"sarx",
2364  			"shlx",
2365  			"shrx",
2366  		};
2367  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2368  			const char *name = tbl[i];
2369  			put(name, REG32, REG32 | MEM, REG32);
2370  #ifdef XBYAK64
2371  			put(name, REG64, REG64 | MEM, REG64);
2372  #endif
2373  		}
2374  	}
2375  	void putGprR_RM()
2376  	{
2377  		const char *tbl[] = {
2378  			"blsi",
2379  			"blsmsk",
2380  			"blsr",
2381  		};
2382  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2383  			const char *name = tbl[i];
2384  			put(name, REG32, REG32 | MEM);
2385  #ifdef XBYAK64
2386  			put(name, REG64, REG64 | MEM);
2387  #endif
2388  		}
2389  	}
2390  	void putGprOtherwise()
2391  	{
2392  		put("rdrand", REG16 | REG32e);
2393  		put("rdseed", REG16 | REG32e);
2394  		put("rorx", REG32, REG32 | MEM, IMM8);
2395  #ifdef XBYAK64
2396  		put("rorx", REG64, REG64 | MEM, IMM8);
2397  #endif
2398  	}
2399  	void putGather()
2400  	{
2401  		const int y_vx_y = 0;
2402  		const int y_vy_y = 1;
2403  		const int x_vy_x = 2;
2404  		const struct Tbl {
2405  			const char *name;
2406  			int mode;
2407  		} tbl[] = {
2408  			{ "vgatherdpd", y_vx_y },
2409  			{ "vgatherqpd", y_vy_y },
2410  			{ "vgatherdps", y_vy_y },
2411  			{ "vgatherqps", x_vy_x },
2412  			{ "vpgatherdd", y_vy_y },
2413  			{ "vpgatherqd", x_vy_x },
2414  			{ "vpgatherdq", y_vx_y },
2415  			{ "vpgatherqq", y_vy_y },
2416  		};
2417  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2418  			const Tbl& p = tbl[i];
2419  			const char *name = p.name;
2420  			put(name, "xmm3", VM32X, "xmm5");
2421  			switch (p.mode) {
2422  			case y_vx_y:
2423  				put(name, "ymm3", VM32X, "ymm5");
2424  				break;
2425  			case y_vy_y:
2426  				put(name, "ymm3", VM32Y, "ymm5");
2427  				break;
2428  			case x_vy_x:
2429  				put(name, "xmm3", VM32Y, "xmm5");
2430  				break;
2431  			default:
2432  				printf("ERR mode=%d\n", p.mode);
2433  				exit(1);
2434  			}
2435  		}
2436  	}
2437  	void putGath(const std::string& vsib)
2438  	{
2439  		std::string x = "xmm1, ";
2440  		std::string a = std::string("[") + vsib + "], xmm3";
2441  		put("vgatherdpd", (x + "ptr" + a).c_str(), (x + a).c_str());
2442  	}
2443  
2444  	void putGatherAll()
2445  	{
2446  		const char *xmmTbl[] = {
2447  			"xmm2",
2448  			"xmm4",
2449  			"xmm2*1",
2450  			"xmm2*4",
2451  		};
2452  		for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2453  			std::string s = xmmTbl[i];
2454  			putGath(s);
2455  			putGath(s + "+3");
2456  			putGath(s + "+eax");
2457  			putGath("3+" + s);
2458  			putGath("eax+" + s);
2459  		}
2460  		for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2461  			int ord[] = { 0, 1, 2 };
2462  			do {
2463  				std::string s;
2464  				for (int j = 0; j < 3; j++) {
2465  					if (j > 0) s += '+';
2466  					switch (ord[j]) {
2467  					case 0: s += xmmTbl[i]; break;
2468  					case 1: s += "123"; break;
2469  					case 2: s += "ebp"; break;
2470  					}
2471  				}
2472  				putGath(s);
2473  			} while (std::next_permutation(ord, ord + 3));
2474  		}
2475  	}
2476  	void putSeg()
2477  	{
2478  		{
2479  			const char *segTbl[] = {
2480  				"es",
2481  				"cs",
2482  				"ss",
2483  				"ds",
2484  				"fs",
2485  				"gs",
2486  			};
2487  			for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2488  				const char *seg = segTbl[i];
2489  				const char *op1Tbl[] = {
2490  					"ax",
2491  					"edx",
2492  					(isXbyak_ ? "ptr [eax]" : "[eax]"),
2493  #ifdef XBYAK64
2494  					"r9",
2495  #endif
2496  				};
2497  				for (size_t j = 0; j < NUM_OF_ARRAY(op1Tbl); j++) {
2498  					const char *op1 = op1Tbl[j];
2499  					if (isXbyak_) {
2500  						printf("mov(%s, %s); dump();\n", op1, seg);
2501  						printf("mov(%s, %s); dump();\n", seg, op1);
2502  					} else {
2503  						printf("mov %s, %s\n", op1, seg);
2504  						printf("mov %s, %s\n", seg, op1);
2505  					}
2506  				}
2507  			}
2508  		}
2509  		{
2510  			const char *segTbl[] = {
2511  #ifdef XBYAK32
2512  				"es",
2513  				"ss",
2514  				"ds",
2515  #endif
2516  				"fs",
2517  				"gs",
2518  			};
2519  			for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2520  				const char *seg = segTbl[i];
2521  				if (isXbyak_) {
2522  					printf("push(%s); dump();\n", seg);
2523  					printf("pop(%s); dump();\n", seg);
2524  				} else {
2525  					printf("push %s\n", seg);
2526  					printf("pop %s\n", seg);
2527  				}
2528  			}
2529  		}
2530  	}
2531  	void put()
2532  	{
2533  #ifdef USE_AVX512
2534  		putAVX512();
2535  #else
2536  
2537  #ifdef USE_AVX
2538  
2539  		separateFunc();
2540  		putFMA2();
2541  
2542  #ifdef USE_YASM
2543  		putGprR_R_RM();
2544  		putGprR_RM_R();
2545  		putGprR_RM();
2546  		putGprOtherwise();
2547  		putGather();
2548  		putGatherAll();
2549  #else
2550  		putAVX1();
2551  		separateFunc();
2552  		putAVX2();
2553  		putAVX_X_X_XM_omit();
2554  		separateFunc();
2555  		putAVX_X_X_XM_IMM();
2556  		separateFunc();
2557  		putAVX_X_XM_IMM();
2558  		separateFunc();
2559  		putAVX_X_X_XM();
2560  		separateFunc();
2561  		putAVX_X_XM();
2562  		separateFunc();
2563  		putAVX_M_X();
2564  		putAVX_X_X_IMM_omit();
2565  		separateFunc();
2566  		putAVX_Y_XM();
2567  		separateFunc();
2568  		putFMA();
2569  		putSHA();
2570  #endif
2571  
2572  #else // USE_AVX
2573  
2574  		putJmp();
2575  		putFarJmp();
2576  
2577  #ifdef USE_YASM
2578  
2579  		putSSSE3();
2580  		putSSE4_1();
2581  		separateFunc();
2582  		putSSE4_2();
2583  		putSeg(); // same behavior as yasm for mov rax, cx
2584  		putPushPop8_16();
2585  #else
2586  		putSIMPLE();
2587  		putVpclmulqdq();
2588  		putReg1();
2589  		putBt();
2590  		putRorM();
2591  		separateFunc();
2592  		putPushPop();
2593  		putTest();
2594  		separateFunc();
2595  		putLoadSeg();
2596  		putEtc();
2597  		putShift();
2598  		putShxd();
2599  
2600  		separateFunc();
2601  
2602  		putBs();
2603  		putMMX1();
2604  		putMMX2();
2605  		separateFunc();
2606  		putMMX3();
2607  		putMMX4();
2608  		putMMX5();
2609  		separateFunc();
2610  		putXMM1();
2611  		putXMM2();
2612  		putXMM3();
2613  		putXMM4();
2614  		separateFunc();
2615  		putCmov();
2616  		putFpuMem16_32();
2617  		putFpuMem32_64();
2618  		separateFunc();
2619  		putFpuMem16_32_64();
2620  		putFpu();
2621  		putFpuFpu();
2622  		putCmp();
2623  		putMPX();
2624  #endif
2625  
2626  #if defined(XBYAK64) && !defined(__ILP32__)
2627  
2628  #ifdef USE_YASM
2629  		putRip();
2630  #else
2631  		putMov64();
2632  		putMovImm64();
2633  #endif
2634  
2635  #endif // XBYAK64
2636  
2637  #endif // USE_AVX
2638  
2639  #endif // USE_AVX512
2640  	}
2641  #ifdef USE_AVX512
2642  	void putOpmask()
2643  	{
2644  		{
2645  			const char *tbl[] = {
2646  				"kadd",
2647  				"kand",
2648  				"kandn",
2649  				"kor",
2650  				"kxnor",
2651  				"kxor",
2652  			};
2653  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2654  				std::string name = tbl[i];
2655  				put(name + "b", K, K, K);
2656  				put(name + "w", K, K, K);
2657  				put(name + "q", K, K, K);
2658  				put(name + "d", K, K, K);
2659  			}
2660  			put("kunpckbw", K, K, K);
2661  			put("kunpckwd", K, K, K);
2662  			put("kunpckdq", K, K, K);
2663  		}
2664  		{
2665  			const char *tbl[] = {
2666  				"knot",
2667  				"kortest",
2668  				"ktest",
2669  			};
2670  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2671  				std::string name = tbl[i];
2672  				put(name + "b", K, K);
2673  				put(name + "w", K, K);
2674  				put(name + "q", K, K);
2675  				put(name + "d", K, K);
2676  			}
2677  		}
2678  		{
2679  			const char *tbl[] = {
2680  				"kshiftl",
2681  				"kshiftr",
2682  			};
2683  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2684  				std::string name = tbl[i];
2685  				put(name + "b", K, K, IMM8);
2686  				put(name + "w", K, K, IMM8);
2687  				put(name + "q", K, K, IMM8);
2688  				put(name + "d", K, K, IMM8);
2689  			}
2690  		}
2691  		put("kmovw", K, K | MEM | REG32);
2692  		put("kmovq", K, K | MEM);
2693  		put("kmovb", K, K | MEM | REG32);
2694  		put("kmovd", K, K | MEM | REG32);
2695  
2696  		put("kmovw", MEM | REG32, K);
2697  		put("kmovq", MEM, K);
2698  		put("kmovb", MEM | REG32, K);
2699  		put("kmovd", MEM | REG32, K);
2700  #ifdef XBYAK64
2701  		put("kmovq", K, REG64);
2702  		put("kmovq", REG64, K);
2703  #endif
2704  	}
2705  	void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false, int sae = 0)
2706  	{
2707  		std::string modifier;
2708  		char pk[16] = "";
2709  		const char *pz = "";
2710  		const char *saeTblXbyak[] = { "", "|T_rn_sae", "|T_rd_sae", "|T_ru_sae", "|T_rz_sae" };
2711  		const char *saeTblNASM[] = { "", ",{rn-sae}", ",{rd-sae}", ",{ru-sae}", ",{rz-sae}" };
2712  		if (isXbyak_) {
2713  			if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
2714  			if (z) pz = "|T_z";
2715  			printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
2716  		} else {
2717  			if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
2718  			if (z && kIdx) pz = "{z}";
2719  			printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
2720  		}
2721  	}
2722  	void putCombi()
2723  	{
2724  		const char *xTbl[] = {
2725  			"xmm2",
2726  #ifdef XBYAK64
2727  			"xmm8", "xmm31"
2728  #else
2729  			"xmm5", "xmm6"
2730  #endif
2731  		};
2732  		const char *yTbl[] = {
2733  			"ymm0",
2734  #ifdef XBYAK64
2735  			"ymm15", "ymm31"
2736  #else
2737  			"ymm4", "ymm2"
2738  #endif
2739  		};
2740  		const char *zTbl[] = {
2741  			"zmm1",
2742  #ifdef XBYAK64
2743  			"zmm9", "zmm30"
2744  #else
2745  			"zmm3", "zmm7"
2746  #endif
2747  		};
2748  		const size_t N = NUM_OF_ARRAY(zTbl);
2749  		for (size_t i = 0; i < N; i++) {
2750  			for (size_t j = 0; j < N; j++) {
2751  				separateFunc();
2752  				for (size_t k = 0; k < N; k++) {
2753  #ifdef XBYAK64
2754  					for (int kIdx = 0; kIdx < 8; kIdx++) {
2755  						put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx);
2756  						put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx);
2757  						for (int z = 0; z < 2; z++) {
2758  							for (int sae = 0; sae < 5; sae++) {
2759  								put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae);
2760  							}
2761  						}
2762  					}
2763  #else
2764  					put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
2765  					put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
2766  					for (int sae = 0; sae < 5; sae++) {
2767  						put_vaddpd(zTbl[i], zTbl[j], zTbl[k], sae);
2768  					}
2769  #endif
2770  				}
2771  			}
2772  		}
2773  		put("vaddpd", XMM, XMM, _MEM);
2774  		put("vaddpd", YMM, YMM, _MEM);
2775  		put("vaddpd", ZMM, ZMM, _MEM);
2776  	}
2777  	void putCmpK()
2778  	{
2779  		{
2780  			const struct Tbl {
2781  				const char *name;
2782  				bool supportYMM;
2783  			} tbl[] = {
2784  				{ "vcmppd", true },
2785  				{ "vcmpps", true },
2786  				{ "vcmpsd", false },
2787  				{ "vcmpss", false },
2788  			};
2789  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2790  				const Tbl *p = &tbl[i];
2791  				put(p->name, K, _XMM, _XMM | MEM, IMM);
2792  				if (!p->supportYMM) continue;
2793  				put(p->name, K, _YMM, _YMM | MEM, IMM);
2794  				put(p->name, K, _ZMM, _ZMM | MEM, IMM);
2795  			}
2796  		}
2797  		put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
2798  #ifdef XBYAK64
2799  		{
2800  			const struct Tbl {
2801  				const char *name;
2802  			} tbl[] = {
2803  				{ "vcomisd" },
2804  				{ "vcomiss" },
2805  				{ "vucomisd" },
2806  				{ "vucomiss" },
2807  			};
2808  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2809  				const Tbl *p = &tbl[i];
2810  				put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
2811  			}
2812  		}
2813  		put("vcomiss", _XMM3, XMM | MEM);
2814  		put("vcomiss", XMM, XMM_SAE);
2815  #endif
2816  	}
2817  	void putBroadcastSub(int idx, int disp)
2818  	{
2819  #ifdef XBYAK64
2820  		const char *a = "rax";
2821  #else
2822  		const char *a = "eax";
2823  #endif
2824  		if (isXbyak_) {
2825  			printf("vaddpd(zmm%d, zmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2826  			printf("vaddpd(ymm%d, ymm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2827  			printf("vaddpd(xmm%d, xmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2828  		} else {
2829  			printf("vaddpd zmm%d, zmm1, [%s+%d]{1to8}\n", idx, a, disp);
2830  			printf("vaddpd ymm%d, ymm1, [%s+%d]{1to4}\n", idx, a, disp);
2831  			printf("vaddpd xmm%d, xmm1, [%s+%d]{1to2}\n", idx, a, disp);
2832  		}
2833  	}
2834  	void putBroadcast()
2835  	{
2836  		for (int i = 0; i < 9; i++) {
2837  			putBroadcastSub(0, i);
2838  #ifdef XBYAK64
2839  			putBroadcastSub(10, i);
2840  			putBroadcastSub(20, i);
2841  #endif
2842  		}
2843  		put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8);
2844  		put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16);
2845  		put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32);
2846  #ifdef XBYAK64
2847  		put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64);
2848  #endif
2849  		{
2850  			const char *tbl[] = {
2851  				"vpbroadcastb",
2852  				"vpbroadcastw",
2853  				"vpbroadcastd",
2854  				"vpbroadcastq",
2855  			};
2856  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2857  				put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
2858  			}
2859  		}
2860  		put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
2861  		put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
2862  		put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
2863  		put("vbroadcasti32x8", ZMM_KZ, _MEM);
2864  		put("vbroadcasti64x4", ZMM_KZ, _MEM);
2865  	}
2866  	void putAVX512_M_X()
2867  	{
2868  		const char *tbl[] = {
2869  			"vmovapd",
2870  			"vmovaps",
2871  			"vmovupd",
2872  			"vmovups",
2873  		};
2874  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2875  			const char *name = tbl[i];
2876  			put(name, MEM|MEM_K, ZMM|XMM|YMM);
2877  			put(name, ZMM, MEM);
2878  		}
2879  	}
2880  	void put_vmov()
2881  	{
2882  #ifdef XBYAK64
2883  		put("vmovd", _XMM3, MEM|REG32);
2884  		put("vmovd", MEM|REG32, _XMM3);
2885  		put("vmovq", _XMM3, MEM|REG64|XMM);
2886  		put("vmovq", MEM|REG64|XMM, _XMM3);
2887  		put("vmovhlps", _XMM3, _XMM3, _XMM3);
2888  		put("vmovlhps", _XMM3, _XMM3, _XMM3);
2889  		put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
2890  		put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
2891  		put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
2892  		put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
2893  
2894  		put("vmovsd", XMM_KZ, _XMM3, _XMM3);
2895  		put("vmovsd", XMM_KZ, MEM);
2896  		put("vmovsd", MEM_K, XMM);
2897  		put("vmovss", XMM_KZ, _XMM3, _XMM3);
2898  		put("vmovss", XMM_KZ, MEM);
2899  		put("vmovss", MEM_K, XMM);
2900  
2901  		put("vmovshdup", _ZMM, _ZMM);
2902  		put("vmovsldup", _ZMM, _ZMM);
2903  
2904  
2905  		{
2906  			const char *tbl[] = {
2907  				"valignd",
2908  				"valignq",
2909  			};
2910  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2911  				const char *name = tbl[i];
2912  				put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
2913  				put(name, _YMM3, _YMM3, _YMM3, IMM);
2914  				put(name, _ZMM, _ZMM, _ZMM, IMM);
2915  			}
2916  		}
2917  		{
2918  			const char tbl[][16] = {
2919  				"vmovhpd",
2920  				"vmovhps",
2921  				"vmovlpd",
2922  				"vmovlps",
2923  			};
2924  			for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2925  				put(tbl[i], _XMM3, _XMM3, MEM);
2926  				put(tbl[i], MEM, _XMM3);
2927  			}
2928  		}
2929  #endif
2930  	}
2931  	void put512_X_XM()
2932  	{
2933  		const struct Tbl {
2934  			const char *name;
2935  			bool M_X;
2936  		} tbl[] = {
2937  			{ "vmovddup", false },
2938  			{ "vmovdqa32", true },
2939  			{ "vmovdqa64", true },
2940  			{ "vmovdqu8", true },
2941  			{ "vmovdqu16", true },
2942  			{ "vmovdqu32", true },
2943  			{ "vmovdqu64", true },
2944  			{ "vpabsb", false },
2945  			{ "vpabsw", false },
2946  			{ "vpabsd", false },
2947  			{ "vpabsq", false },
2948  		};
2949  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2950  			const Tbl& p = tbl[i];
2951  			put(p.name, _XMM|XMM_KZ, _XMM|MEM);
2952  			put(p.name, _YMM|YMM_KZ, _YMM|MEM);
2953  			put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
2954  			if (!p.M_X) continue;
2955  			put(p.name, MEM, _XMM);
2956  			put(p.name, MEM, _YMM);
2957  			put(p.name, MEM, _ZMM);
2958  		}
2959  		put("vsqrtpd", XMM_KZ, M_1to2);
2960  		put("vsqrtpd", YMM_KZ, M_1to4);
2961  		put("vsqrtpd", ZMM_KZ, M_1to8);
2962  		put("vsqrtpd", ZMM_KZ, ZMM_ER);
2963  
2964  		put("vsqrtps", XMM_KZ, M_1to4);
2965  		put("vsqrtps", YMM_KZ, M_1to8);
2966  		put("vsqrtps", ZMM_KZ, M_1to16);
2967  		put("vsqrtps", ZMM_KZ, ZMM_ER);
2968  
2969  		put("vpabsd", ZMM_KZ, M_1to16);
2970  		put("vpabsq", ZMM_KZ, M_1to8);
2971  
2972  		put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
2973  		put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
2974  
2975  		put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
2976  		put("vbroadcastf64x4", ZMM_KZ, _MEM);
2977  	}
2978  	void put512_X_X_XM()
2979  	{
2980  		const struct Tbl {
2981  			const char *name;
2982  			uint64_t mem;
2983  		} tbl[] = {
2984  			{ "vsqrtsd", MEM },
2985  			{ "vsqrtss", MEM },
2986  			{ "vunpckhpd", M_1to2 },
2987  			{ "vunpckhps", M_1to4 },
2988  			{ "vunpcklpd", M_1to2 },
2989  			{ "vunpcklps", M_1to4 },
2990  		};
2991  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2992  			const Tbl& p = tbl[i];
2993  			put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
2994  		}
2995  	}
2996  	void put512_X3()
2997  	{
2998  #ifdef XBYAK64
2999  		const struct Tbl {
3000  			const char *name;
3001  			uint64_t x1;
3002  			uint64_t x2;
3003  			uint64_t xm;
3004  		} tbl[] = {
3005  			{ "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
3006  			{ "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
3007  			{ "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3008  
3009  			{ "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 },
3010  			{ "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 },
3011  			{ "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3012  
3013  			{ "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 },
3014  			{ "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 },
3015  			{ "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3016  
3017  			{ "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
3018  			{ "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
3019  			{ "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3020  
3021  			{ "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
3022  			{ "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
3023  			{ "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 },
3024  			{ "vpaddq", ZMM_KZ, _ZMM, M_1to8 },
3025  
3026  			{ "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
3027  			{ "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3028  
3029  			{ "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
3030  			{ "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3031  
3032  			{ "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
3033  			{ "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
3034  
3035  			{ "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
3036  			{ "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
3037  
3038  			{ "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
3039  			{ "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
3040  			{ "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 },
3041  			{ "vpsubq", ZMM_KZ, _ZMM, M_1to8 },
3042  
3043  			{ "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
3044  			{ "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3045  
3046  			{ "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
3047  			{ "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3048  
3049  			{ "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
3050  			{ "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
3051  
3052  			{ "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
3053  			{ "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
3054  
3055  			{ "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3056  			{ "vpandq", ZMM_KZ, _ZMM, _ZMM | M_1to8 },
3057  
3058  			{ "vpandnd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3059  			{ "vpandnq", ZMM_KZ, _ZMM, _ZMM | M_1to8 },
3060  
3061  			{ "vpavgb", ZMM_KZ, _ZMM, _ZMM },
3062  			{ "vpavgw", ZMM_KZ, _ZMM, _ZMM },
3063  
3064  			{ "vpcmpeqb", K2, _ZMM, _ZMM | _MEM },
3065  			{ "vpcmpeqw", K2, _ZMM, _ZMM | _MEM },
3066  			{ "vpcmpeqd", K2, _ZMM, _ZMM | M_1to16 },
3067  			{ "vpcmpeqq", K2, _ZMM, _ZMM | M_1to8 },
3068  
3069  			{ "vpcmpgtb", K2, _ZMM, _ZMM | _MEM },
3070  			{ "vpcmpgtw", K2, _ZMM, _ZMM | _MEM },
3071  			{ "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 },
3072  			{ "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 },
3073  
3074  			{ "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3075  			{ "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM },
3076  
3077  			{ "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3078  			{ "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3079  			{ "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
3080  			{ "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
3081  
3082  			{ "vpmaxub", ZMM_KZ, _ZMM, _ZMM | _MEM },
3083  			{ "vpmaxuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3084  			{ "vpmaxud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
3085  			{ "vpmaxuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
3086  
3087  			{ "vpminsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3088  			{ "vpminsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3089  			{ "vpminsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
3090  			{ "vpminsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
3091  
3092  			{ "vpminub", ZMM_KZ, _ZMM, _ZMM | _MEM },
3093  			{ "vpminuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3094  			{ "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
3095  			{ "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
3096  
3097  			{ "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
3098  			{ "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
3099  			{ "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
3100  
3101  			{ "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
3102  			{ "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
3103  			{ "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
3104  
3105  			{ "vpsraw", XMM_KZ, _XMM, IMM8 },
3106  			{ "vpsraw", ZMM_KZ, _ZMM, IMM8 },
3107  
3108  			{ "vpsrad", XMM_KZ, _XMM | M_1to4, IMM8 },
3109  			{ "vpsrad", ZMM_KZ, _ZMM | M_1to16, IMM8 },
3110  
3111  			{ "vpsraq", XMM, XMM, IMM8 },
3112  			{ "vpsraq", XMM_KZ, _XMM | M_1to2, IMM8 },
3113  			{ "vpsraq", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3114  
3115  			{ "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
3116  			{ "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
3117  			{ "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
3118  
3119  			{ "vpsrlw", XMM_KZ, _XMM, IMM8 },
3120  			{ "vpsrlw", ZMM_KZ, _ZMM, IMM8 },
3121  
3122  			{ "vpsrld", XMM_KZ, _XMM | M_1to4, IMM8 },
3123  			{ "vpsrld", ZMM_KZ, _ZMM | M_1to16, IMM8 },
3124  
3125  			{ "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
3126  			{ "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
3127  
3128  			{ "vpsravw", XMM_KZ | _XMM, _XMM, _XMM },
3129  			{ "vpsravw", _ZMM, _ZMM, _MEM },
3130  
3131  			{ "vpsravd", XMM_KZ | _XMM, _XMM, _XMM },
3132  			{ "vpsravd", _ZMM, _ZMM, M_1to16 },
3133  
3134  			{ "vpsravq", XMM_KZ | _XMM, _XMM, _XMM },
3135  			{ "vpsravq", _ZMM, _ZMM, M_1to8 },
3136  
3137  			{ "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM },
3138  			{ "vpsllvw", _ZMM, _ZMM, _MEM },
3139  
3140  			{ "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM },
3141  			{ "vpsllvd", _ZMM, _ZMM, M_1to16 },
3142  
3143  			{ "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM },
3144  			{ "vpsllvq", _ZMM, _ZMM, M_1to8 },
3145  
3146  			{ "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM },
3147  			{ "vpsrlvw", _ZMM, _ZMM, _MEM },
3148  
3149  			{ "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM },
3150  			{ "vpsrlvd", _ZMM, _ZMM, M_1to16 },
3151  
3152  			{ "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM },
3153  			{ "vpsrlvq", _ZMM, _ZMM, M_1to8 },
3154  
3155  			{ "vpshufb", _XMM | XMM_KZ, _XMM, _XMM },
3156  			{ "vpshufb", ZMM_KZ, _ZMM, _MEM },
3157  
3158  			{ "vpshufhw", _XMM | XMM_KZ, _XMM, IMM8 },
3159  			{ "vpshufhw", ZMM_KZ, _MEM, IMM8 },
3160  
3161  			{ "vpshuflw", _XMM | XMM_KZ, _XMM, IMM8 },
3162  			{ "vpshuflw", ZMM_KZ, _MEM, IMM8 },
3163  
3164  			{ "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4, IMM8 },
3165  			{ "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16, IMM8 },
3166  
3167  			{ "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 },
3168  			{ "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 },
3169  
3170  			{ "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 },
3171  			{ "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 },
3172  
3173  			{ "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 },
3174  			{ "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 },
3175  
3176  			{ "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 },
3177  			{ "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 },
3178  
3179  			{ "vpsadbw", _XMM3, _XMM, _XMM },
3180  			{ "vpsadbw", _ZMM, _ZMM, _MEM },
3181  
3182  			{ "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 },
3183  			{ "vpmuldq", ZMM_KZ, _ZMM, M_1to8 },
3184  
3185  			{ "vpmulhrsw", _XMM3, _XMM, _XMM },
3186  			{ "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
3187  
3188  			{ "vpmulhuw", _XMM3, _XMM, _XMM },
3189  			{ "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
3190  
3191  			{ "vpmulhw", _XMM3, _XMM, _XMM },
3192  			{ "vpmulhw", ZMM_KZ, _ZMM, _MEM },
3193  
3194  			{ "vpmullw", _XMM3, _XMM, _XMM },
3195  			{ "vpmullw", ZMM_KZ, _ZMM, _MEM },
3196  
3197  			{ "vpmulld", _XMM3, _XMM, M_1to4 },
3198  			{ "vpmulld", ZMM_KZ, _ZMM, M_1to16 },
3199  
3200  			{ "vpmullq", _XMM3, _XMM, M_1to2 },
3201  			{ "vpmullq", ZMM_KZ, _ZMM, M_1to8 },
3202  
3203  			{ "vpmuludq", _XMM3, _XMM, M_1to2 },
3204  			{ "vpmuludq", ZMM_KZ, _ZMM, M_1to8 },
3205  
3206  			{ "vpunpckhbw", _XMM3, _XMM, _XMM },
3207  			{ "vpunpckhbw", _ZMM, _ZMM, _MEM },
3208  
3209  			{ "vpunpckhwd", _XMM3, _XMM, _XMM },
3210  			{ "vpunpckhwd", _ZMM, _ZMM, _MEM },
3211  
3212  			{ "vpunpckhdq", _XMM3, _XMM, M_1to4 },
3213  			{ "vpunpckhdq", _ZMM, _ZMM, M_1to16 },
3214  
3215  			{ "vpunpckhqdq", _XMM3, _XMM, M_1to2 },
3216  			{ "vpunpckhqdq", _ZMM, _ZMM, M_1to8 },
3217  
3218  			{ "vpunpcklbw", _XMM3, _XMM, _XMM },
3219  			{ "vpunpcklbw", _ZMM, _ZMM, _MEM },
3220  
3221  			{ "vpunpcklwd", _XMM3, _XMM, _XMM },
3222  			{ "vpunpcklwd", _ZMM, _ZMM, _MEM },
3223  
3224  			{ "vpunpckldq", _XMM3, _XMM, M_1to4 },
3225  			{ "vpunpckldq", _ZMM, _ZMM, M_1to16 },
3226  
3227  			{ "vpunpcklqdq", _XMM3, _XMM, M_1to2 },
3228  			{ "vpunpcklqdq", _ZMM, _ZMM, M_1to8 },
3229  
3230  			{ "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3231  			{ "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3232  			{ "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3233  			{ "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3234  
3235  			{ "vextracti32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3236  			{ "vextracti64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3237  			{ "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3238  			{ "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3239  
3240  			{ "vextractps", REG32 | _MEM, _XMM3, IMM8 },
3241  
3242  			{ "vpermb", XMM_KZ, _XMM, _XMM },
3243  			{ "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3244  
3245  			{ "vpermw", XMM_KZ, _XMM, _XMM },
3246  			{ "vpermw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3247  
3248  			{ "vpermd", YMM_KZ, _YMM, _YMM | M_1to8 },
3249  			{ "vpermd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3250  
3251  			{ "vpermilpd", XMM_KZ, _XMM, _XMM | M_1to2 },
3252  			{ "vpermilpd", ZMM_KZ, _ZMM, M_1to8 },
3253  			{ "vpermilpd", XMM_KZ, M_1to2, IMM8 },
3254  			{ "vpermilpd", ZMM_KZ, M_1to8, IMM8 },
3255  
3256  			{ "vpermilps", XMM_KZ, _XMM, _XMM | M_1to4 },
3257  			{ "vpermilps", ZMM_KZ, _ZMM, M_1to16 },
3258  			{ "vpermilps", XMM_KZ, M_1to4, IMM8 },
3259  			{ "vpermilps", ZMM_KZ, M_1to16, IMM8 },
3260  
3261  			{ "vpermpd", YMM_KZ, _YMM | M_1to4, IMM8 },
3262  			{ "vpermpd", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3263  			{ "vpermpd", YMM_KZ, _YMM, M_1to4 },
3264  			{ "vpermpd", ZMM_KZ, _ZMM, M_1to8 },
3265  
3266  			{ "vpermps", YMM_KZ, _YMM, M_1to8 },
3267  			{ "vpermps", ZMM_KZ, _ZMM, M_1to16 },
3268  
3269  			{ "vpermq", YMM_KZ, _YMM | M_1to4, IMM8 },
3270  			{ "vpermq", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3271  			{ "vpermq", YMM_KZ, _YMM, M_1to4 },
3272  			{ "vpermq", ZMM_KZ, _ZMM, M_1to8 },
3273  		};
3274  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3275  			const Tbl& p = tbl[i];
3276  			put(p.name, p.x1, p.x2, p.xm);
3277  		}
3278  #endif
3279  	}
3280  	void put512_X3_I()
3281  	{
3282  		const struct Tbl {
3283  			const char *name;
3284  			uint64_t x1;
3285  			uint64_t x2;
3286  			uint64_t xm;
3287  		} tbl[] = {
3288  #ifdef XBYAK64
3289  			{ "vinsertps", _XMM, _XMM, _XMM3 },
3290  
3291  			{ "vshufpd", XMM_KZ, _XMM, M_1to2 },
3292  			{ "vshufpd", ZMM_KZ, _ZMM, M_1to8 },
3293  
3294  			{ "vshufps", XMM_KZ, _XMM, M_1to4 },
3295  			{ "vshufps", ZMM_KZ, _ZMM, M_1to16 },
3296  
3297  			{ "vinsertf32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3298  			{ "vinsertf32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3299  
3300  			{ "vinsertf64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3301  			{ "vinsertf64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3302  
3303  			{ "vinsertf32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3304  			{ "vinsertf64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3305  
3306  			{ "vinserti32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3307  			{ "vinserti32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3308  
3309  			{ "vinserti64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3310  			{ "vinserti64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3311  
3312  			{ "vinserti32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3313  			{ "vinserti64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3314  #endif
3315  			{ "vpalignr", ZMM_KZ, _ZMM, _ZMM },
3316  		};
3317  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3318  			const Tbl& p = tbl[i];
3319  			put(p.name, p.x1, p.x2, p.xm, IMM8);
3320  		}
3321  #ifdef XBYAK64
3322  		put("vpextrb", _REG64, _XMM3, IMM8);
3323  		put("vpextrw", _REG64|MEM, _XMM3, IMM8);
3324  		put("vpextrd", _REG32, _XMM3, IMM8);
3325  		put("vpextrq", _REG64, _XMM3, IMM8);
3326  		put("vpinsrb", _XMM3, _XMM3, _REG32, IMM8);
3327  		put("vpinsrw", _XMM3, _XMM3, _REG32, IMM8);
3328  		put("vpinsrd", _XMM3, _XMM3, _REG32, IMM8);
3329  		put("vpinsrq", _XMM3, _XMM3, _REG64, IMM8);
3330  #endif
3331  	}
3332  	void put512_FMA()
3333  	{
3334  		const struct Tbl {
3335  			const char *name;
3336  			bool supportYMM;
3337  		} tbl[] = {
3338  			{ "vfmadd", true },
3339  			{ "vfmadd", false },
3340  			{ "vfmaddsub", true },
3341  			{ "vfmsubadd", true },
3342  			{ "vfmsub", true },
3343  			{ "vfmsub", false },
3344  			{ "vfnmadd", true },
3345  			{ "vfnmadd", false },
3346  			{ "vfnmsub", true },
3347  			{ "vfnmsub", false },
3348  		};
3349  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3350  			const Tbl& p = tbl[i];
3351  			const struct Ord {
3352  				const char *name;
3353  			} ord[] = {
3354  				{ "132" },
3355  				{ "213" },
3356  				{ "231" },
3357  			};
3358  			for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
3359  				const char sufTbl[][2][8] = {
3360  					{ "pd", "ps" },
3361  					{ "sd", "ss" },
3362  				};
3363  				for (size_t k = 0; k < 2; k++) {
3364  					const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
3365  					uint64_t mem = 0;
3366  					if (suf == "pd") {
3367  						mem = M_1to2;
3368  					} else if (suf == "ps") {
3369  						mem = M_1to4;
3370  					} else {
3371  						mem = XMM_ER;
3372  					}
3373  					std::string name = std::string(p.name) + ord[j].name + suf;
3374  					const char *q = name.c_str();
3375  					put(q, XMM_KZ, _XMM, mem);
3376  					if (!p.supportYMM) continue;
3377  					if (suf == "pd") {
3378  						mem = M_1to8;
3379  					} else if (suf == "ps") {
3380  						mem = M_1to16;
3381  					} else {
3382  						mem = XMM_ER;
3383  					}
3384  					put(q, _ZMM, _ZMM, mem);
3385  				}
3386  			}
3387  		}
3388  	}
3389  	void put512_Y_XM()
3390  	{
3391  		const char *tbl[] = {
3392  			"vpmovsxbw",
3393  			"vpmovsxbd",
3394  			"vpmovsxbq",
3395  			"vpmovsxwd",
3396  			"vpmovsxwq",
3397  			"vpmovsxdq",
3398  			"vpmovzxbw",
3399  			"vpmovzxbd",
3400  			"vpmovzxbq",
3401  			"vpmovzxwd",
3402  			"vpmovzxwq",
3403  			"vpmovzxdq",
3404  		};
3405  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3406  			const char *name = tbl[i];
3407  			put(name, XMM_KZ, _XMM);
3408  			put(name, _ZMM, _MEM);
3409  		}
3410  	}
3411  	void put512_AVX1()
3412  	{
3413  #ifdef XBYAK64
3414  		const struct Tbl {
3415  			std::string name;
3416  			bool only_pd_ps;
3417  		} tbl[] = {
3418  			{ "vadd", false },
3419  			{ "vsub", false },
3420  			{ "vmul", false },
3421  			{ "vdiv", false },
3422  			{ "vmax", false },
3423  			{ "vmin", false },
3424  			{ "vand", true },
3425  			{ "vandn", true },
3426  			{ "vor", true },
3427  			{ "vxor", true },
3428  		};
3429  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3430  			const struct Suf {
3431  				const char *suf;
3432  				bool supportYMM;
3433  			} sufTbl[] = {
3434  				{ "pd", true },
3435  				{ "ps", true },
3436  				{ "sd", false },
3437  				{ "ss", false },
3438  			};
3439  			for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
3440  				if (tbl[i].only_pd_ps && j == 2) break;
3441  				std::string suf = sufTbl[j].suf;
3442  				std::string name = tbl[i].name + suf;
3443  				const char *p = name.c_str();
3444  				uint64_t mem = 0;
3445  				if (suf == "pd") {
3446  					mem = M_1to2;
3447  				} else if (suf == "ps") {
3448  					mem = M_1to4;
3449  				}
3450  				put(p, _XMM3 | XMM_KZ, _XMM, mem);
3451  				if (!sufTbl[j].supportYMM) continue;
3452  				mem = 0;
3453  				if (suf == "pd") {
3454  					mem = M_1to8;
3455  				} else if (suf == "ps") {
3456  					mem = M_1to16;
3457  				}
3458  				put(p, _ZMM, _ZMM, mem);
3459  			}
3460  		}
3461  #endif
3462  	}
3463  	void put512_cvt()
3464  	{
3465  #ifdef XBYAK64
3466  		put("vcvtdq2pd", XMM_KZ, _XMM | M_1to2);
3467  		put("vcvtdq2pd", YMM_KZ, _XMM | M_1to4);
3468  		put("vcvtdq2pd", ZMM_KZ, _YMM | M_1to8);
3469  
3470  		put("vcvtdq2ps", XMM_KZ, _XMM | M_1to4);
3471  		put("vcvtdq2ps", YMM_KZ, _YMM | M_1to8);
3472  		put("vcvtdq2ps", ZMM_KZ, _ZMM | M_1to16);
3473  
3474  		put("vcvtpd2dq", XMM_KZ, _XMM | _YMM | M_1to2);
3475  		put("vcvtpd2dq", YMM_KZ, _ZMM | ZMM_ER | M_1to8);
3476  #endif
3477  	}
3478  	void putMin()
3479  	{
3480  #ifdef XBYAK64
3481  		put("vcvtpd2dq", _XMM | _XMM3, _XMM | M_xword | M_1to2);
3482  		put("vcvtpd2dq", _XMM | _XMM3, _YMM | M_yword | MY_1to4);
3483  #endif
3484  	}
3485  	void putAVX512()
3486  	{
3487  #ifdef MIN_TEST
3488  		putMin();
3489  #else
3490  		putOpmask();
3491  		separateFunc();
3492  		putCombi();
3493  		separateFunc();
3494  		putCmpK();
3495  		separateFunc();
3496  		putBroadcast();
3497  		separateFunc();
3498  		putAVX512_M_X();
3499  		separateFunc();
3500  		put_vmov();
3501  		separateFunc();
3502  		put512_X_XM();
3503  		separateFunc();
3504  		put512_X_X_XM();
3505  		separateFunc();
3506  		put512_X3();
3507  		separateFunc();
3508  		put512_X3_I();
3509  		separateFunc();
3510  		put512_FMA();
3511  		separateFunc();
3512  		put512_Y_XM();
3513  		separateFunc();
3514  		put512_AVX1();
3515  		separateFunc();
3516  		put512_cvt();
3517  #endif
3518  	}
3519  #endif
3520  };
3521  
3522  int main(int argc, char *[])
3523  {
3524  	Test test(argc > 1);
3525  	test.put();
3526  }