/ externals / xbyak / gen / gen_code.cpp
gen_code.cpp
   1  #define XBYAK_DONT_READ_LIST
   2  #include <stdio.h>
   3  #include <string.h>
   4  #include "xbyak/xbyak.h"
   5  #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
   6  
   7  using namespace Xbyak;
   8  #ifdef _MSC_VER
   9  	#pragma warning(disable : 4996) // scanf
  10  	#define snprintf _snprintf_s
  11  #endif
  12  
  13  #include "avx_type.hpp"
  14  /*
  15  	reg = cx/ecx/rcx
  16  	insert 0x67 if prefix is true
  17  */
  18  void put_jREGz(const char *reg, bool prefix)
  19  {
  20  	printf("void j%sz(std::string label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : "");
  21  	printf("void j%sz(const Label& label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : "");
  22  }
  23  
  24  struct GenericTbl {
  25  	const char *name;
  26  	uint8_t code1;
  27  	uint8_t code2;
  28  	uint8_t code3;
  29  	uint8_t code4;
  30  };
  31  
  32  void putGeneric(const GenericTbl *p, size_t n)
  33  {
  34  	for (size_t i = 0; i < n; i++) {
  35  		printf("void %s() { db(0x%02X); ", p->name, p->code1);
  36  		if (p->code2) printf("db(0x%02X); ", p->code2);
  37  		if (p->code3) printf("db(0x%02X); ", p->code3);
  38  		if (p->code4) printf("db(0x%02X); ", p->code4);
  39  		printf("}\n");
  40  		p++;
  41  	}
  42  }
  43  
  44  void putX_X_XM(bool omitOnly)
  45  {
  46  	// (x, x, x/m[, imm]) or (y, y, y/m[, imm])
  47  	{
  48  		const struct Tbl {
  49  			uint8_t code;
  50  			const char *name;
  51  			uint64_t type;
  52  			bool hasIMM;
  53  			bool enableOmit;
  54  			int mode; // 1 : sse, 2 : avx, 3 : sse + avx
  55  		} tbl[] = {
  56  			{ 0x0D, "blendpd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
  57  			{ 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
  58  			{ 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 },
  59  			{ 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
  60  			{ 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
  61  			{ 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
  62  			{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
  63  			{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
  64  			{ 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 },
  65  			{ 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 },
  66  			{ 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
  67  			{ 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
  68  
  69  			{ 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
  70  			{ 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
  71  			{ 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
  72  			{ 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
  73  			{ 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
  74  
  75  			{ 0xC2, "cmppd", T_0F | T_66 | T_YMM, true, true, 2 },
  76  			{ 0xC2, "cmpps", T_0F | T_YMM, true, true, 2 },
  77  			{ 0xC2, "cmpsd", T_0F | T_F2, true, true, 2 },
  78  			{ 0xC2, "cmpss", T_0F | T_F3, true, true, 2 },
  79  			{ 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 },
  80  			{ 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 },
  81  			{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 },
  82  			{ 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  83  			{ 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
  84  			{ 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  85  			{ 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
  86  
  87  			{ 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  88  			{ 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  89  			{ 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
  90  			{ 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
  91  
  92  			{ 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  93  			{ 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  94  
  95  			{ 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  96  			{ 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
  97  
  98  			{ 0x0F, "palignr", T_0F3A | T_66 | T_YMM | T_EVEX, true, true, 2 },
  99  
 100  			{ 0xDB, "pand", T_0F | T_66 | T_YMM, false, true, 2 },
 101  			{ 0xDF, "pandn", T_0F | T_66 | T_YMM, false, true, 2 },
 102  
 103  			{ 0xE0, "pavgb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 104  			{ 0xE3, "pavgw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 105  
 106  			{ 0x74, "pcmpeqb", T_0F | T_66 | T_YMM, false, true, 2 },
 107  			{ 0x75, "pcmpeqw", T_0F | T_66 | T_YMM, false, true, 2 },
 108  			{ 0x76, "pcmpeqd", T_0F | T_66 | T_YMM, false, true, 2 },
 109  			{ 0x29, "pcmpeqq", T_0F38 | T_66 | T_YMM, false, true, 3 },
 110  
 111  			{ 0x64, "pcmpgtb", T_0F | T_66 | T_YMM, false, true, 2 },
 112  			{ 0x65, "pcmpgtw", T_0F | T_66 | T_YMM, false, true, 2 },
 113  			{ 0x66, "pcmpgtd", T_0F | T_66 | T_YMM, false, true, 2 },
 114  			{ 0x37, "pcmpgtq", T_0F38 | T_66 | T_YMM, false, true, 3 },
 115  
 116  			{ 0x01, "phaddw", T_0F38 | T_66 | T_YMM, false, true, 2 },
 117  			{ 0x02, "phaddd", T_0F38 | T_66 | T_YMM, false, true, 2 },
 118  			{ 0x03, "phaddsw", T_0F38 | T_66 | T_YMM, false, true, 2 },
 119  
 120  			{ 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true, 2 },
 121  			{ 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true, 2 },
 122  			{ 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true, 2 },
 123  			{ 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 124  			{ 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 },
 125  
 126  			{ 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
 127  			{ 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 128  			{ 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
 129  
 130  			{ 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 131  			{ 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
 132  			{ 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
 133  
 134  			{ 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
 135  			{ 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 136  			{ 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
 137  
 138  			{ 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 139  			{ 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
 140  			{ 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
 141  
 142  			{ 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 143  			{ 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 },
 144  			{ 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 145  			{ 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 146  			{ 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
 147  
 148  			{ 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 149  			{ 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 3 },
 150  
 151  			{ 0xEB, "por", T_0F | T_66 | T_YMM, false, true, 2 },
 152  			{ 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 153  
 154  			{ 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX, false, false, 2 },
 155  
 156  			{ 0x08, "psignb", T_0F38 | T_66 | T_YMM, false, true, 2 },
 157  			{ 0x09, "psignw", T_0F38 | T_66 | T_YMM, false, true, 2 },
 158  			{ 0x0A, "psignd", T_0F38 | T_66 | T_YMM, false, true, 2 },
 159  
 160  			{ 0xF1, "psllw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 },
 161  			{ 0xF2, "pslld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 },
 162  			{ 0xF3, "psllq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 },
 163  
 164  			{ 0xE1, "psraw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 },
 165  			{ 0xE2, "psrad", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 },
 166  			{ 0xD1, "psrlw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 },
 167  			{ 0xD2, "psrld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 },
 168  			{ 0xD3, "psrlq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 },
 169  
 170  			{ 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 171  			{ 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 172  			{ 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
 173  			{ 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 174  
 175  			{ 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 176  			{ 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 177  
 178  			{ 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 179  			{ 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 180  
 181  			{ 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 182  			{ 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 183  			{ 0x6A, "punpckhdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
 184  			{ 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 185  
 186  			{ 0x60, "punpcklbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 187  			{ 0x61, "punpcklwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
 188  			{ 0x62, "punpckldq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
 189  			{ 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 190  
 191  			{ 0xEF, "pxor", T_0F | T_66 | T_YMM, false, true, 2 },
 192  
 193  			{ 0x53, "rcpss", T_0F | T_F3, false, true, 2 },
 194  			{ 0x52, "rsqrtss", T_0F | T_F3, false, true, 2 },
 195  
 196  			{ 0xC6, "shufpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, true, 2 },
 197  			{ 0xC6, "shufps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, true, true, 2 },
 198  
 199  			{ 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X | T_N8, false, true, 2 },
 200  			{ 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X | T_N4, false, true, 2 },
 201  
 202  			{ 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 203  			{ 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
 204  
 205  			{ 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
 206  			{ 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
 207  
 208  			{ 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
 209  			{ 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
 210  			{ 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 },
 211  			{ 0xDA, "sm3msg1", T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
 212  			{ 0xDA, "sm3msg2", T_66 | T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
 213  			{ 0xDE, "sm3rnds2", T_66 | T_0F3A | T_W0 | T_EVEX | T_EW0, true, false, 2 },
 214  			{ 0xDA, "sm4key4", T_F3 | T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
 215  			{ 0xDA, "sm4rnds4", T_F2 | T_0F38 | T_W0 | T_EVEX | T_EW0, false, false, 2 },
 216  		};
 217  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 218  			const Tbl *p = &tbl[i];
 219  			std::string s = type2String(p->type);
 220  			if (omitOnly) {
 221  				if (p->enableOmit) {
 222  					printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : "");
 223  				}
 224  			} else {
 225  				if (p->mode & 1) {
 226  					if (p->hasIMM) {
 227  						printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x%02X, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }\n", p->name, p->code);
 228  					} else {
 229  						printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code);
 230  					}
 231  				}
 232  				if (p->mode & 2) {
 233  					printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
 234  					, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
 235  				}
 236  			}
 237  		}
 238  	}
 239  }
 240  
 241  void putMemOp(const char *name, const char *type, uint8_t ext, uint8_t code, int bit, int fwait = false)
 242  {
 243  	printf("void %s(const Address& addr) { %sopMR(addr, Reg%d(%d), %s, 0x%02X); }\n", name, fwait ? "db(0x9B); " : "", bit, ext, type, code);
 244  }
 245  
 246  void putLoadSeg(const char *name, uint64_t type, uint8_t code)
 247  {
 248  	printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, %s, 0x%02X); }\n", name, type ? "T_0F" : "T_NONE", code);
 249  }
 250  
 251  void put()
 252  {
 253  	{
 254  		char buf[16];
 255  		unsigned int v = VERSION;
 256  		if (v & 0xF) {
 257  			snprintf(buf, sizeof(buf), "%d.%02X.%x", v >> 12, (v >> 4) & 0xFF, v & 0xF);
 258  		} else {
 259  			snprintf(buf, sizeof(buf), "%d.%02X", v >> 12, (v >> 4) & 0xFF);
 260  		}
 261  		printf("const char *getVersionString() const { return \"%s\"; }\n", buf);
 262  	}
 263  	const int B = 1 << 0;
 264  	const int W = 1 << 1;
 265  	const int D = 1 << 2;
 266  	const int Q = 1 << 3;
 267  	{
 268  		const struct Tbl {
 269  			uint8_t code;
 270  			const char *name;
 271  		} tbl[] = {
 272  			// MMX
 273  			{ 0x6B, "packssdw" },
 274  			{ 0x63, "packsswb" },
 275  			{ 0x67, "packuswb" },
 276  
 277  			{ 0xDB, "pand" },
 278  			{ 0xDF, "pandn" },
 279  
 280  			{ 0xF5, "pmaddwd" },
 281  			{ 0xE4, "pmulhuw" },
 282  			{ 0xE5, "pmulhw" },
 283  			{ 0xD5, "pmullw" },
 284  
 285  			{ 0xEB, "por" },
 286  
 287  			{ 0x68, "punpckhbw" },
 288  			{ 0x69, "punpckhwd" },
 289  			{ 0x6A, "punpckhdq" },
 290  
 291  			{ 0x60, "punpcklbw" },
 292  			{ 0x61, "punpcklwd" },
 293  			{ 0x62, "punpckldq" },
 294  
 295  			{ 0xEF, "pxor" },
 296  
 297  			// MMX2
 298  			{ 0xE0, "pavgb" },
 299  			{ 0xE3, "pavgw" },
 300  			{ 0xEE, "pmaxsw" },
 301  			{ 0xDE, "pmaxub" },
 302  			{ 0xEA, "pminsw" },
 303  			{ 0xDA, "pminub" },
 304  			{ 0xF6, "psadbw" },
 305  			//
 306  			{ 0xD4, "paddq" },
 307  			{ 0xF4, "pmuludq" },
 308  			{ 0xFB, "psubq" },
 309  		};
 310  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 311  			const Tbl *p = &tbl[i];
 312  			printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n"
 313  				, p->name, p->code);
 314  		}
 315  	}
 316  
 317  	{
 318  		const struct Tbl {
 319  			uint8_t code;
 320  			int mode;
 321  			const char *name;
 322  		} tbl[] = {
 323  			{ 0xFC, B|W|D, "padd" },
 324  			{ 0xEC, B|W , "padds" },
 325  			{ 0xDC, B|W , "paddus" },
 326  			{ 0x74, B|W|D, "pcmpeq" },
 327  			{ 0x64, B|W|D, "pcmpgt" },
 328  			{ 0xF0, W|D|Q, "psll" },
 329  			{ 0xE0, W|D , "psra" },
 330  			{ 0xD0, W|D|Q, "psrl" },
 331  			{ 0xF8, B|W|D, "psub" },
 332  			{ 0xE8, B|W , "psubs" },
 333  			{ 0xD8, B|W , "psubus" },
 334  		};
 335  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 336  			const Tbl *p = &tbl[i];
 337  			static const char modTbl[][4] = {
 338  				"b", "w", "d", "q"
 339  			};
 340  			for (int j = 0; j < 4; j++) {
 341  				// B(0), W(1), D(2), Q(3)
 342  				if (!(p->mode & (1 << j))) continue;
 343  				printf("void %s%s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n"
 344  					, p->name, modTbl[j]
 345  					, p->code | j
 346  				);
 347  			}
 348  		}
 349  	}
 350  
 351  	{
 352  		const struct Tbl {
 353  			uint8_t code;
 354  			int ext;
 355  			int mode;
 356  			const char *name;
 357  		} tbl[] = {
 358  			{ 0x70, 6, W|D|Q, "psll" },
 359  			{ 0x70, 4, W|D , "psra" },
 360  			{ 0x70, 2, W|D|Q, "psrl" },
 361  		};
 362  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 363  			const Tbl *p = &tbl[i];
 364  			static const char modTbl[][4] = {
 365  				"b", "w", "d", "q"
 366  			};
 367  			for (int j = 0; j < 4; j++) {
 368  				// B(0), W(1), D(2), Q(3)
 369  				if (!(p->mode & (1 << j))) continue;
 370  				printf("void %s%s(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x%02X, %d); }\n"
 371  					, p->name, modTbl[j]
 372  					, p->code | j
 373  					, p->ext
 374  				);
 375  			}
 376  		}
 377  		printf("void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 7);
 378  		printf("void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 3);
 379  	}
 380  
 381  	{
 382  		const struct Tbl {
 383  			const char *pref;
 384  			const char *name;
 385  		} tbl[] = {
 386  			{ "T_NONE", "pshufw" },
 387  			{ "T_F2", "pshuflw" },
 388  			{ "T_F3", "pshufhw" },
 389  			{ "T_66", "pshufd" },
 390  		};
 391  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 392  			const Tbl *p = &tbl[i];
 393  			printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, %s, imm8); }\n", p->name, p->pref);
 394  		}
 395  	}
 396  	{
 397  		const struct MmxTbl6 {
 398  			uint8_t code; // for (reg, reg/[mem])
 399  			uint8_t code2; // for ([mem], reg)
 400  			const char *pref;
 401  			const char *name;
 402  		} mmxTbl6[] = {
 403  			{ 0x6F, 0x7F, "T_66", "movdqa" },
 404  			{ 0x6F, 0x7F, "T_F3", "movdqu" },
 405  			// SSE2
 406  			{ 0x28, 0x29, "T_NONE", "movaps" },
 407  			{ 0x10, 0x11, "T_F3", "movss" },
 408  			{ 0x10, 0x11, "T_NONE", "movups" },
 409  			{ 0x28, 0x29, "T_66", "movapd" },
 410  			{ 0x10, 0x11, "T_F2", "movsd" },
 411  			{ 0x10, 0x11, "T_66", "movupd" },
 412  		};
 413  		for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
 414  			const MmxTbl6 *p = &mmxTbl6[i];
 415  			printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, T_0F, %s); }\n", p->name, p->code, p->pref);
 416  			printf("void %s(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
 417  		}
 418  	}
 419  	{
 420  		enum {
 421  			PS = 1 << 0,
 422  			SS = 1 << 1,
 423  			PD = 1 << 2,
 424  			SD = 1 << 3
 425  		};
 426  		const struct {
 427  			const char *type;
 428  			const char *name;
 429  		} sufTbl[] = {
 430  			{ "T_0F", "ps" },
 431  			{ "T_0F | T_F3", "ss" },
 432  			{ "T_0F | T_66", "pd" },
 433  			{ "T_0F | T_F2", "sd" },
 434  		};
 435  		const struct Tbl {
 436  			uint8_t code;
 437  			int mode;
 438  			const char *name;
 439  			bool hasImm;
 440  		} tbl[] = {
 441  			{ 0x58, PS|SS|PD|SD, "add" },
 442  			{ 0x55, PS|PD     , "andn" },
 443  			{ 0x54, PS|PD     , "and" },
 444  			{ 0xC2, PS|SS|PD|SD, "cmp", true },
 445  			{ 0x5E, PS|SS|PD|SD, "div" },
 446  			{ 0x5F, PS|SS|PD|SD, "max" },
 447  			{ 0x5D, PS|SS|PD|SD, "min" },
 448  			{ 0x59, PS|SS|PD|SD, "mul" },
 449  			{ 0x56, PS|PD     , "or" },
 450  			{ 0x53, PS|SS     , "rcp" },
 451  			{ 0x52, PS|SS     , "rsqrt" },
 452  			{ 0xC6, PS|PD     , "shuf", true },
 453  			{ 0x51, PS|SS|PD|SD, "sqrt" },
 454  			{ 0x5C, PS|SS|PD|SD, "sub" },
 455  			{ 0x15, PS|PD     , "unpckh" },
 456  			{ 0x14, PS|PD     , "unpckl" },
 457  			{ 0x57, PS|PD     , "xor" },
 458  			//
 459  		};
 460  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 461  			const Tbl *p = &tbl[i];
 462  			for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
 463  				if (!(p->mode & (1 << j))) continue;
 464  				if (p->hasImm) {
 465  					// don't change uint8_t to int because NO is not in byte
 466  					printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, sufTbl[j].type, p->code);
 467  				} else {
 468  					printf("void %s%s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, sufTbl[j].type, p->code);
 469  				}
 470  			}
 471  		}
 472  	}
 473  	{
 474  		// (XMM, XMM)
 475  		const struct Tbl {
 476  			uint64_t type;
 477  			uint8_t code;
 478  			const char *name;
 479  		} tbl[] = {
 480  			{ T_66 | T_0F, 0xF7, "maskmovdqu" },
 481  			{ T_0F, 0x12, "movhlps" },
 482  			{ T_0F, 0x16, "movlhps" },
 483  		};
 484  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 485  			const Tbl *p = &tbl[i];
 486  			std::string s = type2String(p->type);
 487  			printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
 488  		}
 489  	}
 490  	{
 491  		// (XMM, XMM|MEM)
 492  		const struct Tbl {
 493  			uint8_t code;
 494  			uint64_t type;
 495  			const char *name;
 496  		} tbl[] = {
 497  			{ 0x6D, T_66, "punpckhqdq" },
 498  			{ 0x6C, T_66, "punpcklqdq" },
 499  
 500  			{ 0x2F, T_NONE, "comiss" },
 501  			{ 0x2E, T_NONE, "ucomiss" },
 502  			{ 0x2F, T_66, "comisd" },
 503  			{ 0x2E, T_66, "ucomisd" },
 504  
 505  			{ 0x5A, T_66, "cvtpd2ps" },
 506  			{ 0x5A, T_NONE, "cvtps2pd" },
 507  			{ 0x5A, T_F2, "cvtsd2ss" },
 508  			{ 0x5A, T_F3, "cvtss2sd" },
 509  			{ 0xE6, T_F2, "cvtpd2dq" },
 510  			{ 0xE6, T_66, "cvttpd2dq" },
 511  			{ 0xE6, T_F3, "cvtdq2pd" },
 512  			{ 0x5B, T_66, "cvtps2dq" },
 513  			{ 0x5B, T_F3, "cvttps2dq" },
 514  			{ 0x5B, T_NONE, "cvtdq2ps" },
 515  		};
 516  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 517  			const Tbl *p = &tbl[i];
 518  			std::string s = type2String(p->type | T_0F);
 519  			printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, s.c_str(), p->code);
 520  		}
 521  	}
 522  	{
 523  		// special type
 524  		const struct Tbl {
 525  			uint8_t code;
 526  			uint64_t type;
 527  			const char *name;
 528  			const char *cond;
 529  		} tbl[] = {
 530  			{ 0x2A, T_NONE , "cvtpi2ps",  "isXMM_MMXorMEM" },
 531  			{ 0x2D, T_NONE , "cvtps2pi",  "isMMX_XMMorMEM" },
 532  			{ 0x2A, T_F3, "cvtsi2ss",  "isXMM_REG32orMEM" },
 533  			{ 0x2D, T_F3, "cvtss2si",  "isREG32_XMMorMEM" },
 534  			{ 0x2C, T_NONE , "cvttps2pi", "isMMX_XMMorMEM" },
 535  			{ 0x2C, T_F3, "cvttss2si", "isREG32_XMMorMEM" },
 536  			{ 0x2A, T_66, "cvtpi2pd",  "isXMM_MMXorMEM" },
 537  			{ 0x2D, T_66, "cvtpd2pi",  "isMMX_XMMorMEM" },
 538  			{ 0x2A, T_F2, "cvtsi2sd",  "isXMM_REG32orMEM" },
 539  			{ 0x2D, T_F2, "cvtsd2si",  "isREG32_XMMorMEM" },
 540  			{ 0x2C, T_66, "cvttpd2pi", "isMMX_XMMorMEM" },
 541  			{ 0x2C, T_F2, "cvttsd2si", "isREG32_XMMorMEM" },
 542  		};
 543  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 544  			const Tbl *p = &tbl[i];
 545  			std::string s = type2String(p->type | T_0F);
 546  			printf("void %s(const Reg& reg, const Operand& op) { opSSE(reg, op, %s, 0x%02X, %s); }\n", p->name, s.c_str(), p->code, p->cond);
 547  		}
 548  	}
 549  	{
 550  		// prefetch
 551  		const struct Tbl {
 552  			int ext;
 553  			const char *name;
 554  			int code;
 555  		} tbl[] = {
 556  			{ 1, "t0", 0x18},
 557  			{ 2, "t1", 0x18},
 558  			{ 3, "t2", 0x18},
 559  			{ 0, "nta", 0x18},
 560  			{ 2, "wt1", 0x0D},
 561  			{ 1, "w", 0x0D},
 562  			{ 7, "it0", 0x18},
 563  			{ 6, "it1", 0x18},
 564  		};
 565  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 566  			const Tbl *p = &tbl[i];
 567  			printf("void prefetch%s(const Address& addr) { opMR(addr, Reg32(%d), T_0F, 0x%02X); }\n", p->name, p->ext, p->code);
 568  		}
 569  	}
 570  	{
 571  		const struct Tbl {
 572  			uint8_t code;
 573  			const char *name;
 574  			uint64_t type;
 575  		} tbl[] = {
 576  			{ 0x16, "movhps", T_0F },
 577  			{ 0x12, "movlps", T_0F },
 578  			{ 0x16, "movhpd", T_0F | T_66 },
 579  			{ 0x12, "movlpd", T_0F | T_66 },
 580  		};
 581  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 582  			const Tbl *p = &tbl[i];
 583  			std::string s = type2String(p->type);
 584  			printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
 585  		}
 586  	}
 587  	{
 588  		// cmov
 589  		const struct Tbl {
 590  			uint8_t ext;
 591  			const char *name;
 592  		} tbl[] = {
 593  			{ 0, "o" },
 594  			{ 1, "no" },
 595  			{ 2, "b" },
 596  			{ 2, "c" },
 597  			{ 2, "nae" },
 598  			{ 3, "nb" },
 599  			{ 3, "ae" },
 600  			{ 3, "nc" },
 601  			{ 4, "e" },
 602  			{ 4, "z" },
 603  			{ 5, "ne" },
 604  			{ 5, "nz" },
 605  			{ 6, "be" },
 606  			{ 6, "na" },
 607  			{ 7, "nbe" },
 608  			{ 7, "a" },
 609  			{ 8, "s" },
 610  			{ 9, "ns" },
 611  			{ 10, "p" },
 612  			{ 10, "pe" },
 613  			{ 11, "np" },
 614  			{ 11, "po" },
 615  			{ 12, "l" },
 616  			{ 12, "nge" },
 617  			{ 13, "nl" },
 618  			{ 13, "ge" },
 619  			{ 14, "le" },
 620  			{ 14, "ng" },
 621  			{ 15, "nle" },
 622  			{ 15, "g" },
 623  		};
 624  		const char *msg = "//-V524"; // disable warning of PVS-Studio
 625  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 626  			const Tbl *p = &tbl[i];
 627  			printf("void cmov%s(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | %d, op.isREG(16|i32e)); }%s\n", p->name, p->ext, msg);
 628  			printf("void cmov%s(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | %d); }%s\n", p->name, p->ext, msg);
 629  			printf("void j%s(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
 630  			printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
 631  			printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg);
 632  			printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
 633  			printf("void set%s(const Operand& op) { opSetCC(op, %d); }%s\n", p->name, p->ext, msg);
 634  
 635  			// ccmpscc
 636  			// true if SCC = 0b1010, false if SCC = 0b1011 (see APX Architecture Specification p.266)
 637  			const char *s = p->ext == 10 ? "t" : p->ext == 11 ? "f" : p->name;
 638  			printf("void ccmp%s(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, %d); }\n", s, p->ext);
 639  			printf("void ccmp%s(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, %d); }\n", s, p->ext);
 640  			printf("void ctest%s(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, %d); }\n", s, p->ext);
 641  			printf("void ctest%s(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, %d); }\n", s, p->ext);
 642  		}
 643  	}
 644  	{
 645  		// cfcmov
 646  		const struct Tbl {
 647  			uint8_t code;
 648  			const char *suf;
 649  		} tbl[] = {
 650  			{0x40, "o"},
 651  			{0x41, "no"},
 652  			{0x42, "b"},
 653  			{0x43, "nb"},
 654  			{0x44, "z"},
 655  			{0x45, "nz"},
 656  			{0x46, "be"},
 657  			{0x47, "nbe"},
 658  			{0x48, "s"},
 659  			{0x49, "ns"},
 660  			{0x4A, "p"},
 661  			{0x4B, "np"},
 662  			{0x4C, "l"},
 663  			{0x4D, "nl"},
 664  			{0x4E, "le"},
 665  			{0x4F, "nle"},
 666  		};
 667  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 668  			const Tbl *p = &tbl[i];
 669  			printf("void cfcmov%s(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x%02X); }\n", p->suf, p->code);
 670  			printf("void cfcmov%s(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x%02X); }\n", p->suf, p->code);
 671  		}
 672  	}
 673  	{
 674  		const struct Tbl {
 675  			const char *name;
 676  			uint8_t code;
 677  		} tbl[] = {
 678  			{ "loop", 0xE2 },
 679  			{ "loope", 0xE1 },
 680  			{ "loopne", 0xE0 },
 681  		};
 682  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 683  			const Tbl *p = &tbl[i];
 684  			printf("void %s(std::string label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code);
 685  			printf("void %s(const Label& label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code);
 686  			printf("void %s(const char *label) { %s(std::string(label)); }\n", p->name, p->name);
 687  		}
 688  	}
 689  	////////////////////////////////////////////////////////////////
 690  	{
 691  		const GenericTbl tbl[] = {
 692  			{ "bnd", 0xf2 }, /* 0xf2 prefix for MPX */
 693  			{ "cbw", 0x66, 0x98 },
 694  			{ "cdq", 0x99 },
 695  			{ "clc", 0xF8 },
 696  			{ "cld", 0xFC },
 697  			{ "cli", 0xFA },
 698  			{ "cmc", 0xF5 },
 699  
 700  			{ "cpuid", 0x0F, 0xA2 },
 701  			{ "cwd", 0x66, 0x99 },
 702  			{ "cwde", 0x98 },
 703  			{ "cmpsb", 0xA6 },
 704  			{ "cmpsw", 0x66, 0xA7 },
 705  			{ "cmpsd", 0xA7 },
 706  			{ "endbr32", 0xF3, 0x0F, 0x1E, 0xFB },
 707  			{ "endbr64", 0xF3, 0x0F, 0x1E, 0xFA },
 708  			{ "hlt", 0xF4 },
 709  			{ "int3", 0xCC },
 710  			{ "scasb", 0xAE },
 711  			{ "scasw", 0x66, 0xAF },
 712  			{ "scasd", 0xAF },
 713  			{ "movsb", 0xA4 },
 714  			{ "leave", 0xC9 },
 715  			{ "lodsb", 0xAC },
 716  			{ "lodsw", 0x66, 0xAD },
 717  			{ "lodsd", 0xAD },
 718  			{ "movsw", 0x66, 0xA5 },
 719  			{ "movsd", 0xA5 },
 720  			{ "outsb", 0x6E },
 721  			{ "outsw", 0x66, 0x6F },
 722  			{ "outsd", 0x6F },
 723  			{ "stosb", 0xAA },
 724  			{ "stosw", 0x66, 0xAB },
 725  			{ "stosd", 0xAB },
 726  			{ "rep", 0xF3 },
 727  			{ "repe", 0xF3 },
 728  			{ "repz", 0xF3 },
 729  			{ "repne", 0xF2 },
 730  			{ "repnz", 0xF2 },
 731  
 732  			{ "lahf", 0x9F },
 733  			{ "lock", 0xF0 },
 734  
 735  			{ "sahf", 0x9E },
 736  			{ "serialize", 0x0F, 0x01, 0xE8 },
 737  			{ "stc", 0xF9 },
 738  			{ "std", 0xFD },
 739  			{ "sti", 0xFB },
 740  			{ "sysenter", 0x0F, 0x34 },
 741  			{ "sysexit", 0x0F, 0x35 },
 742  
 743  			{ "emms", 0x0F, 0x77 },
 744  			{ "pause", 0xF3, 0x90 },
 745  			{ "sfence", 0x0F, 0xAE, 0xF8 },
 746  			{ "lfence", 0x0F, 0xAE, 0xE8 },
 747  			{ "mfence", 0x0F, 0xAE, 0xF0 },
 748  			{ "monitor", 0x0F, 0x01, 0xC8 },
 749  			{ "mwait", 0x0F, 0x01, 0xC9 },
 750  
 751  			{ "rdmsr", 0x0F, 0x32 },
 752  			{ "rdpmc", 0x0F, 0x33 },
 753  			{ "rdtsc", 0x0F, 0x31 },
 754  			{ "rdtscp", 0x0F, 0x01, 0xF9 },
 755  			{ "ud2", 0x0F, 0x0B },
 756  			{ "wait", 0x9B },
 757  			{ "fwait", 0x9B },
 758  			{ "wbinvd", 0x0F, 0x09 },
 759  			{ "wrmsr", 0x0F, 0x30 },
 760  			{ "xlatb", 0xD7 },
 761  			{ "xend", 0x0f, 0x01, 0xd5 },
 762  
 763  			{ "popf", 0x9D },
 764  			{ "pushf", 0x9C },
 765  			{ "stac", 0x0F, 0x01, 0xCB },
 766  
 767  			{ "vzeroall", 0xC5, 0xFC, 0x77 },
 768  			{ "vzeroupper", 0xC5, 0xF8, 0x77 },
 769  			{ "xgetbv", 0x0F, 0x01, 0xD0 },
 770  
 771  			// FPU
 772  			{ "f2xm1", 0xD9, 0xF0 },
 773  			{ "fabs", 0xD9, 0xE1 },
 774  			{ "faddp", 0xDE, 0xC1 },
 775  			{ "fchs", 0xD9, 0xE0 },
 776  			{ "fclex", 0x9B, 0xDB, 0xE2 },
 777  			{ "fnclex", 0xDB, 0xE2 },
 778  			{ "fcom", 0xD8, 0xD1 },
 779  			{ "fcomp", 0xD8, 0xD9 },
 780  			{ "fcompp", 0xDE, 0xD9 },
 781  			{ "fcos", 0xD9, 0xFF },
 782  			{ "fdecstp", 0xD9, 0xF6 },
 783  			{ "fdivp", 0xDE, 0xF9 },
 784  			{ "fdivrp", 0xDE, 0xF1 },
 785  			{ "fincstp", 0xD9, 0xF7 },
 786  			{ "finit", 0x9B, 0xDB, 0xE3 },
 787  			{ "fninit", 0xDB, 0xE3 },
 788  			{ "fld1", 0xD9, 0xE8 },
 789  			{ "fldl2t", 0xD9, 0xE9 },
 790  			{ "fldl2e", 0xD9, 0xEA },
 791  			{ "fldpi", 0xD9, 0xEB },
 792  			{ "fldlg2", 0xD9, 0xEC },
 793  			{ "fldln2", 0xD9, 0xED },
 794  			{ "fldz", 0xD9, 0xEE },
 795  			{ "fmulp", 0xDE, 0xC9 },
 796  			{ "fnop", 0xD9, 0xD0 },
 797  			{ "fpatan", 0xD9, 0xF3 },
 798  			{ "fprem", 0xD9, 0xF8 },
 799  			{ "fprem1", 0xD9, 0xF5 },
 800  			{ "fptan", 0xD9, 0xF2 },
 801  			{ "frndint", 0xD9, 0xFC },
 802  			{ "fscale", 0xD9, 0xFD },
 803  			{ "fsin", 0xD9, 0xFE },
 804  			{ "fsincos", 0xD9, 0xFB },
 805  			{ "fsqrt", 0xD9, 0xFA },
 806  			{ "fsubp", 0xDE, 0xE9 },
 807  			{ "fsubrp", 0xDE, 0xE1 },
 808  			{ "ftst", 0xD9, 0xE4 },
 809  			{ "fucom", 0xDD, 0xE1 },
 810  			{ "fucomp", 0xDD, 0xE9 },
 811  			{ "fucompp", 0xDA, 0xE9 },
 812  			{ "fxam", 0xD9, 0xE5 },
 813  			{ "fxch", 0xD9, 0xC9 },
 814  			{ "fxtract", 0xD9, 0xF4 },
 815  			{ "fyl2x", 0xD9, 0xF1 },
 816  			{ "fyl2xp1", 0xD9, 0xF9 },
 817  
 818  			// AMD Zen
 819  			{ "monitorx", 0x0F, 0x01, 0xFA },
 820  			{ "mwaitx", 0x0F, 0x01, 0xFB },
 821  			{ "clzero", 0x0F, 0x01, 0xFC },
 822  		};
 823  		putGeneric(tbl, NUM_OF_ARRAY(tbl));
 824  		puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }");
 825  		puts("void int_(uint8_t x) { db(0xCD); db(x); }");
 826  		putLoadSeg("lss", T_0F, 0xB2);
 827  		putLoadSeg("lfs", T_0F, 0xB4);
 828  		putLoadSeg("lgs", T_0F, 0xB5);
 829  	}
 830  	{
 831  		const struct Tbl {
 832  			uint8_t code; // (reg, reg)
 833  			uint8_t ext; // (reg, imm)
 834  			const char *name;
 835  			bool support3op;
 836  			uint64_t type;
 837  		} tbl[] = {
 838  			{ 0x10, 2, "adc", true, T_NONE },
 839  			{ 0x00, 0, "add", true, T_NF | T_CODE1_IF1 },
 840  			{ 0x20, 4, "and_", true, T_NF | T_CODE1_IF1 },
 841  			{ 0x38, 7, "cmp", false, T_NONE },
 842  			{ 0x08, 1, "or_", true, T_NF | T_CODE1_IF1 },
 843  			{ 0x18, 3, "sbb", true, T_NONE },
 844  			{ 0x28, 5, "sub", true, T_NF | T_CODE1_IF1 },
 845  			{ 0x30, 6, "xor_", true, T_NF | T_CODE1_IF1 },
 846  		};
 847  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 848  			const Tbl *p = &tbl[i];
 849  			printf("void %s(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x%02X); }\n", p->name, p->code);
 850  			printf("void %s(const Operand& op, uint32_t imm) { opOI(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
 851  			if (!p->support3op) continue;
 852  			std::string s = type2String(p->type);
 853  			printf("void %s(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
 854  			printf("void %s(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, %s, %d); }\n", p->name, s.c_str(), p->ext);
 855  		}
 856  	}
 857  	{
 858  		const struct Tbl {
 859  			const char *name;
 860  			const char *prefix;
 861  		} tbl[] = {
 862  			{ "aadd", "" },
 863  			{ "aand", "|T_66" },
 864  			{ "aor", "|T_F2" },
 865  			{ "axor", "|T_F3" },
 866  		};
 867  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 868  			const Tbl *p = &tbl[i];
 869  			printf("void %s(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38%s, 0x0FC, T_APX%s); }\n", p->name, p->prefix, p->prefix);
 870  		}
 871  	}
 872  
 873  	{
 874  		const struct Tbl {
 875  			uint8_t ext;
 876  			const char *name;
 877  		} tbl[] = {
 878  			{ 1, "dec" },
 879  			{ 0, "inc" },
 880  		};
 881  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 882  			const Tbl *p = &tbl[i];
 883  			printf("void %s(const Operand& op) { opIncDec(Reg(), op, %d); }\n", p->name, p->ext);
 884  			printf("void %s(const Reg& d, const Operand& op) { opIncDec(d, op, %d); }\n", p->name, p->ext);
 885  		}
 886  	}
 887  	{
 888  		const struct Tbl {
 889  			uint8_t code;
 890  			uint8_t ext;
 891  			const char *name;
 892  		} tbl[] = {
 893  			{ 0xa3, 4, "bt" },
 894  			{ 0xab, 5, "bts" },
 895  			{ 0xb3, 6, "btr" },
 896  			{ 0xbb, 7, "btc" },
 897  		};
 898  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 899  			const Tbl *p = &tbl[i];
 900  			printf("void %s(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0x%02X, op.isREG(16|i32e) && op.getBit() == reg.getBit()); }\n", p->name, p->code);
 901  			printf("void %s(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, %d, T_0F, 0xba, false, 1); db(imm); }\n", p->name, p->ext);
 902  		}
 903  	}
 904  	{
 905  		const struct Tbl {
 906  			uint8_t code;
 907  			uint8_t ext;
 908  			const char *name;
 909  			bool NF;
 910  			int n; // # of op
 911  		} tbl[] = {
 912  			{ 0xF6, 6, "div", true, 1 },
 913  			{ 0xF6, 7, "idiv", true, 1 },
 914  			{ 0xF6, 5, "imul", true ,3 },
 915  			{ 0xF6, 4, "mul", true, 1 },
 916  			{ 0xF6, 3, "neg", true, 2 },
 917  			{ 0xF6, 2, "not_", false, 2 },
 918  		};
 919  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 920  			const Tbl *p = &tbl[i];
 921  			const std::string name = p->name;
 922  			uint64_t type = T_APX|T_CODE1_IF1;
 923  			if (p->NF) type |= T_NF;
 924  			std::string s = type2String(type);
 925  			printf("void %s(const Operand& op) { opRext(op, 0, %d, %s, 0x%02X); }\n", p->name, p->ext, s.c_str(), p->code);
 926  			if (p->n == 2) {
 927  				type = T_APX|T_ND1|T_CODE1_IF1;
 928  				if (p->NF) type |= T_NF;
 929  				s = type2String(type);
 930  				printf("void %s(const Reg& d, const Operand& op) { opROO(d, op, Reg(%d, Operand::REG, d.getBit()), %s, 0xF6); }\n", p->name, p->ext, s.c_str());
 931  			}
 932  		}
 933  		printf("void imul(const Reg& reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xAF)) return; opRO(reg, op, T_0F, 0xAF, reg.getKind() == op.getKind()); }\n");
 934  	}
 935  	{
 936  		const struct Tbl {
 937  			const char *name;
 938  			uint8_t ext; // |8 means supporting NF=1
 939  		} tbl[] = {
 940  			{ "rcl", 2|0 },
 941  			{ "rcr", 3|0 },
 942  			{ "rol", 0|8 },
 943  			{ "ror", 1|8 },
 944  			{ "sar", 7|8 },
 945  			{ "shl", 4|8 },
 946  			{ "shr", 5|8 },
 947  
 948  			{ "sal", 4|8 },
 949  		};
 950  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 951  			const Tbl *p = &tbl[i];
 952  			printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext);
 953  			printf("void %s(const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d); }\n", p->name, p->ext);
 954  			printf("void %s(const Reg& d, const Operand& op, int imm) { opShift(op, imm, %d, &d); }\n", p->name, p->ext);
 955  			printf("void %s(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d, &d); }\n", p->name, p->ext);
 956  		}
 957  	}
 958  	{
 959  		const struct Tbl {
 960  			const char *name;
 961  			uint8_t code;
 962  			uint8_t code2;
 963  		} tbl[] = {
 964  			{ "shld", 0xA4, 0x24 },
 965  			{ "shrd", 0xAC, 0x2C },
 966  		};
 967  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 968  			const Tbl *p = &tbl[i];
 969  			printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0x%02X, 0x%02X); }\n", p->name, p->code, p->code2);
 970  			printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0x%02X, 0x%02X, &_cl); }\n", p->name, p->code, p->code2);
 971  			printf("void %s(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0x%02X, 0x%02X); }\n", p->name, p->code, p->code2);
 972  			printf("void %s(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0x%02X, 0x%02X, &_cl); }\n", p->name, p->code, p->code2);
 973  		}
 974  	}
 975  	{
 976  		const struct Tbl {
 977  			const char *name;
 978  			uint8_t code;
 979  		} tbl[] = {
 980  			{ "bsf", 0xBC },
 981  			{ "bsr", 0xBD },
 982  		};
 983  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 984  			const Tbl *p = &tbl[i];
 985  			printf("void %s(const Reg&reg, const Operand& op) { opRO(reg, op, T_0F, 0x%02X, op.isREG(16|i32e)); }\n", p->name, p->code);
 986  		}
 987  	}
 988  	{
 989  		const struct Tbl {
 990  			const char *name;
 991  			uint8_t code;
 992  			uint8_t code2;
 993  		} tbl[] = {
 994  			{ "popcnt", 0xB8, 0 },
 995  			{ "tzcnt", 0xBC, 0xF4 },
 996  			{ "lzcnt", 0xBD, 0xF5 },
 997  		};
 998  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
 999  			const Tbl *p = &tbl[i];
1000  			if (p->code2) {
1001  				printf("void %s(const Reg&reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0x%02X)) return; opCnt(reg, op, 0x%02X); }\n", p->name, p->code2, p->code);
1002  			} else {
1003  				printf("void %s(const Reg&reg, const Operand& op) { opCnt(reg, op, 0x%02X); }\n", p->name, p->code);
1004  			}
1005  		}
1006  	}
1007  	// SSSE3
1008  	{
1009  		const struct Tbl {
1010  			uint8_t code;
1011  			const char *name;
1012  		} tbl[] = {
1013  			{ 0x00, "pshufb" },
1014  			{ 0x01, "phaddw" },
1015  			{ 0x02, "phaddd" },
1016  			{ 0x03, "phaddsw" },
1017  			{ 0x04, "pmaddubsw" },
1018  			{ 0x05, "phsubw" },
1019  			{ 0x06, "phsubd" },
1020  			{ 0x07, "phsubsw" },
1021  			{ 0x08, "psignb" },
1022  			{ 0x09, "psignw" },
1023  			{ 0x0a, "psignd" },
1024  			{ 0x0b, "pmulhrsw" },
1025  			{ 0x1c, "pabsb" },
1026  			{ 0x1d, "pabsw" },
1027  			{ 0x1e, "pabsd" },
1028  		};
1029  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1030  			const Tbl *p = &tbl[i];
1031  			printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, T_0F38, T_66); }\n", p->name, p->code);
1032  		}
1033  		printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0F, T_0F3A, T_66, static_cast<uint8_t>(imm)); }\n");
1034  	}
1035  	{
1036  		const struct Tbl {
1037  			const char *name;
1038  			uint8_t code;
1039  		} tbl[] = {
1040  			{ "pclmullqlqdq", 0 },
1041  			{ "pclmulhqlqdq", 1 },
1042  			{ "pclmullqhqdq", 0x10 },
1043  			{ "pclmulhqhqdq", 0x11 },
1044  		};
1045  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1046  			const Tbl *p = &tbl[i];
1047  			printf("void %s(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x%02X); }\n", p->name, p->code);
1048  			printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x%02X); }\n", p->name, p->code);
1049  		}
1050  	}
1051  	{
1052  		const struct Tbl {
1053  			const char *type;
1054  			uint8_t code;
1055  			uint8_t ext;
1056  			const char *name;
1057  			bool fwait;
1058  		} tbl[] = {
1059  			{ "T_0F", 0xAE, 2, "ldmxcsr", false },
1060  			{ "T_0F", 0xAE, 3, "stmxcsr", false },
1061  			{ "T_0F", 0xAE, 7, "clflush", false },
1062  			{ "T_66 | T_0F", 0xAE, 7, "clflushopt", false},
1063  			{ "0", 0xDF, 4, "fbld", false },
1064  			{ "0", 0xDF, 6, "fbstp", false },
1065  			{ "0", 0xD9, 5, "fldcw", false },
1066  			{ "0", 0xD9, 4, "fldenv", false },
1067  			{ "0", 0xDD, 4, "frstor", false },
1068  			{ "0", 0xDD, 6, "fsave", true  },
1069  			{ "0", 0xDD, 6, "fnsave", false },
1070  			{ "0", 0xD9, 7, "fstcw", true },
1071  			{ "0", 0xD9, 7, "fnstcw", false },
1072  			{ "0", 0xD9, 6, "fstenv", true },
1073  			{ "0", 0xD9, 6, "fnstenv", false },
1074  			{ "0", 0xDD, 7, "fstsw", true },
1075  			{ "0", 0xDD, 7, "fnstsw", false },
1076  			{ "T_0F", 0xAE, 1, "fxrstor", false },
1077  		};
1078  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1079  			const Tbl *p = &tbl[i];
1080  			putMemOp(p->name, p->type, p->ext, p->code, 32, p->fwait);
1081  		}
1082  		puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }");
1083  		puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }");
1084  	}
1085  	{
1086  		const struct Tbl {
1087  			uint8_t code;
1088  			const char *name;
1089  		} tbl[] = {
1090  			{ 0x2B, "movntpd" },
1091  			{ 0xE7, "movntdq" },
1092  		};
1093  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1094  			const Tbl *p = &tbl[i];
1095  			// cast xmm register to 16bit register to put 0x66
1096  			printf("void %s(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x%02X); }\n", p->name, p->code);
1097  		}
1098  	}
1099  	{
1100  		const struct Tbl {
1101  			uint8_t code;
1102  			const char *name;
1103  		} tbl[] = {
1104  			{ 0xBE, "movsx" },
1105  			{ 0xB6, "movzx" },
1106  		};
1107  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1108  			const Tbl *p = &tbl[i];
1109  			printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code);
1110  		}
1111  	}
1112  	{
1113  		const struct Tbl {
1114  			uint8_t prefix;
1115  			const char *name;
1116  		} tbl[] = {
1117  			{ 0x66, "adcx" },
1118  			{ 0xF3, "adox" },
1119  		};
1120  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1121  			const Tbl *p = &tbl[i];
1122  			printf("void %s(const Reg32e& reg, const Operand& op) { if (!reg.isREG(16|i32e) && reg.getBit() == op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (opROO(Reg(), op, reg, T_%02X, 0x66)) return; opRO(reg, op, T_%02X | T_0F38, 0xF6); }\n", p->name, p->prefix, p->prefix);
1123  			printf("void %s(const Reg32e& d, const Reg32e& reg, const Operand& op) { opROO(d, op, reg, T_%02X, 0x66); }\n", p->name, p->prefix);
1124  		}
1125  	}
1126  	{ // in/out
1127  		puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }");
1128  		puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }");
1129  		puts("void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }");
1130  		puts("void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }");
1131  	}
1132  	// mpx
1133  	{
1134  		puts("void bndcl(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F3 | T_0F, 0x1A, !op.isMEM()); }");
1135  		puts("void bndcu(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1A, !op.isMEM()); }");
1136  		puts("void bndcn(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1B, !op.isMEM()); }");
1137  		puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, T_0F, 0x1A); }");
1138  		puts("void bndmk(const BoundsReg& bnd, const Address& addr) { opMR(addr, bnd, T_F3 | T_0F, 0x1B); }");
1139  		puts("void bndmov(const BoundsReg& bnd, const Operand& op) { opRO(bnd, op, T_66 | T_0F, 0x1A, op.isBNDREG()); }");
1140  		puts("void bndmov(const Address& addr, const BoundsReg& bnd) { opMR(addr, bnd, T_66 | T_0F, 0x1B); }");
1141  		puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, T_0F, 0x1B); }");
1142  	}
1143  	// misc
1144  	{
1145  		puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); }");
1146  		puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
1147  		puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }");
1148  
1149  		puts("void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
1150  		puts("void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
1151  		puts("void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }");
1152  		puts("void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }");
1153  		puts("void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }");
1154  		puts("void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }");
1155  		puts("void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); }");
1156  
1157  		puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }");
1158  		puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }");
1159  		puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }");
1160  		puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }");
1161  		puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, op, T_0F | (mmx.isXMM() ? T_66 : T_NONE), 0xC4, 0, imm); }");
1162  		puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x21, isXMM_XMMorMEM, imm); }");
1163  		puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); }");
1164  		puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); }");
1165  
1166  		puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }");
1167  		puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }");
1168  		puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }");
1169  		puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }");
1170  		puts("void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }");
1171  		puts("void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }");
1172  		puts("void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }");
1173  		puts("void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }");
1174  		puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }");
1175  
1176  		puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }");
1177  		puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
1178  		puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }");
1179  		puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
1180  		puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }");
1181  		puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }");
1182  		puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }");
1183  		puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
1184  		puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
1185  		puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
1186  		puts("void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast<const Reg&>(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); }");
1187  		puts("void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
1188  		puts("void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
1189  		puts("void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
1190  		puts("void clwb(const Address& addr) { opMR(addr, esi, T_66 | T_0F, 0xAE); }");
1191  		puts("void cldemote(const Address& addr) { opMR(addr, eax, T_0F, 0x1C); }");
1192  		puts("void xabort(uint8_t imm) { db(0xC6); db(0xF8); db(imm); }");
1193  		puts("void xbegin(uint32_t rel) { db(0xC7); db(0xF8); dd(rel); }");
1194  
1195  		puts("void vsha512msg1(const Ymm& y, const Xmm& x) { if (!(y.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y, 0, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCC); }");
1196  		puts("void vsha512msg2(const Ymm& y1, const Ymm& y2) { if (!(y1.isYMM() && y2.isYMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, 0, y2, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCD); }");
1197  		puts("void vsha512rnds2(const Ymm& y1, const Ymm& y2, const Xmm& x) { if (!(y1.isYMM() && y2.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, &y2, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCB); }");
1198  	}
1199  	{
1200  		const struct Tbl {
1201  			uint8_t m16;
1202  			uint8_t m32;
1203  			uint8_t m64;
1204  			uint8_t ext;
1205  			const char *name;
1206  			uint8_t m64ext;
1207  		} tbl[] = {
1208  			{ 0x00, 0xD8, 0xDC, 0, "fadd" },
1209  			{ 0xDE, 0xDA, 0x00, 0, "fiadd" },
1210  			{ 0x00, 0xD8, 0xDC, 2, "fcom" },
1211  			{ 0x00, 0xD8, 0xDC, 3, "fcomp" },
1212  			{ 0x00, 0xD8, 0xDC, 6, "fdiv" },
1213  			{ 0xDE, 0xDA, 0x00, 6, "fidiv" },
1214  			{ 0x00, 0xD8, 0xDC, 7, "fdivr" },
1215  			{ 0xDE, 0xDA, 0x00, 7, "fidivr" },
1216  			{ 0xDE, 0xDA, 0x00, 2, "ficom" },
1217  			{ 0xDE, 0xDA, 0x00, 3, "ficomp" },
1218  			{ 0xDF, 0xDB, 0xDF, 0, "fild", 5 },
1219  			{ 0xDF, 0xDB, 0x00, 2, "fist" },
1220  			{ 0xDF, 0xDB, 0xDF, 3, "fistp", 7 },
1221  			{ 0xDF, 0xDB, 0xDD, 1, "fisttp" },
1222  			{ 0x00, 0xD9, 0xDD, 0, "fld" },
1223  			{ 0x00, 0xD8, 0xDC, 1, "fmul" },
1224  			{ 0xDE, 0xDA, 0x00, 1, "fimul" },
1225  			{ 0x00, 0xD9, 0xDD, 2, "fst" },
1226  			{ 0x00, 0xD9, 0xDD, 3, "fstp" },
1227  			{ 0x00, 0xD8, 0xDC, 4, "fsub" },
1228  			{ 0xDE, 0xDA, 0x00, 4, "fisub" },
1229  			{ 0x00, 0xD8, 0xDC, 5, "fsubr" },
1230  			{ 0xDE, 0xDA, 0x00, 5, "fisubr" },
1231  		};
1232  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1233  			const Tbl *p = &tbl[i];
1234  			printf("void %s(const Address& addr) { opFpuMem(addr, 0x%02X, 0x%02X, 0x%02X, %d, %d); }\n", p->name, p->m16, p->m32, p->m64, p->ext, p->m64ext);
1235  		}
1236  	}
1237  	{
1238  		const struct Tbl {
1239  			uint32_t code1;
1240  			uint32_t code2;
1241  			const char *name;
1242  		} tbl[] = {
1243  			{ 0xD8C0, 0xDCC0, "fadd" },
1244  			{ 0x0000, 0xDEC0, "faddp" },
1245  
1246  			{ 0xDAC0, 0x00C0, "fcmovb" },
1247  			{ 0xDAC8, 0x00C8, "fcmove" },
1248  			{ 0xDAD0, 0x00D0, "fcmovbe" },
1249  			{ 0xDAD8, 0x00D8, "fcmovu" },
1250  			{ 0xDBC0, 0x00C0, "fcmovnb" },
1251  			{ 0xDBC8, 0x00C8, "fcmovne" },
1252  			{ 0xDBD0, 0x00D0, "fcmovnbe" },
1253  			{ 0xDBD8, 0x00D8, "fcmovnu" },
1254  
1255  			{ 0xDBF0, 0x00F0, "fcomi" },
1256  			{ 0xDFF0, 0x00F0, "fcomip" },
1257  			{ 0xDBE8, 0x00E8, "fucomi" },
1258  			{ 0xDFE8, 0x00E8, "fucomip" },
1259  
1260  			{ 0xD8F0, 0xDCF8, "fdiv" },
1261  			{ 0x0000, 0xDEF8, "fdivp" },
1262  			{ 0xD8F8, 0xDCF0, "fdivr" },
1263  			{ 0x0000, 0xDEF0, "fdivrp" },
1264  			{ 0xD8C8, 0xDCC8, "fmul" },
1265  			{ 0x0000, 0xDEC8, "fmulp" },
1266  			{ 0xD8E0, 0xDCE8, "fsub" },
1267  			{ 0x0000, 0xDEE8, "fsubp" },
1268  			{ 0xD8E8, 0xDCE0, "fsubr" },
1269  			{ 0x0000, 0xDEE0, "fsubrp" },
1270  		};
1271  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1272  			const Tbl *p = &tbl[i];
1273  			printf("void %s(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
1274  			// omit st0 version(like nasm)
1275  			if (p->code1) {
1276  				printf("void %s(const Fpu& reg1) { opFpuFpu(st0, reg1, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
1277  			} else {
1278  				printf("void %s(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
1279  			}
1280  		}
1281  	}
1282  	{
1283  		const struct Tbl {
1284  			uint8_t code1;
1285  			uint8_t code2;
1286  			const char *name;
1287  		} tbl[] = {
1288  			{ 0xD8, 0xD0, "fcom" },
1289  			{ 0xD8, 0xD8, "fcomp" },
1290  			{ 0xDD, 0xC0, "ffree" },
1291  			{ 0xD9, 0xC0, "fld" },
1292  			{ 0xDD, 0xD0, "fst" },
1293  			{ 0xDD, 0xD8, "fstp" },
1294  			{ 0xDD, 0xE0, "fucom" },
1295  			{ 0xDD, 0xE8, "fucomp" },
1296  			{ 0xD9, 0xC8, "fxch" },
1297  		};
1298  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1299  			const Tbl *p = &tbl[i];
1300  			printf("void %s(const Fpu& reg) { opFpu(reg, 0x%02X, 0x%02X); }\n", p->name, p->code1, p->code2);
1301  		}
1302  	}
1303  	// AVX
1304  	{ // pd, ps, sd, ss
1305  		const struct Tbl {
1306  			uint8_t code;
1307  			const char *name;
1308  			bool only_pd_ps;
1309  		} tbl[] = {
1310  			{ 0x58, "add", false },
1311  			{ 0x5C, "sub", false },
1312  			{ 0x59, "mul", false },
1313  			{ 0x5E, "div", false },
1314  			{ 0x5F, "max", false },
1315  			{ 0x5D, "min", false },
1316  			{ 0x54, "and", true },
1317  			{ 0x55, "andn", true },
1318  			{ 0x56, "or", true },
1319  			{ 0x57, "xor", true },
1320  		};
1321  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1322  			const Tbl *p = &tbl[i];
1323  			printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code);
1324  			printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
1325  			if (p->only_pd_ps) continue;
1326  			printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x%02X); }\n", p->name, p->code);
1327  			printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x%02X); }\n", p->name, p->code);
1328  		}
1329  	}
1330  	putX_X_XM(false);
1331  
1332  	// (x, x/m[, imm]) or (y, y/m[, imm])
1333  	{
1334  		const struct Tbl {
1335  			uint8_t code;
1336  			const char *name;
1337  			uint64_t type;
1338  			bool hasIMM;
1339  			int mode; // 1 : SSE, 2 : AVX, 3 : SSE + AVX
1340  		} tbl[] = {
1341  			{ 0x15, "blendvpd", T_0F38 | T_66, false, 1 },
1342  			{ 0x14, "blendvps", T_0F38 | T_66, false, 1 },
1343  			{ 0x10, "pblendvb", T_0F38 | T_66, false, 1 },
1344  			{ 0xDF, "aeskeygenassist", T_0F3A | T_66, true, 3 },
1345  			{ 0xDB, "aesimc", T_0F38 | T_66 | T_W0, false, 3 },
1346  			{ 0x09, "roundpd", T_0F3A | T_66 | T_YMM, true, 3 },
1347  			{ 0x08, "roundps", T_0F3A | T_66 | T_YMM, true, 3 },
1348  			{ 0x05, "permilpd", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, 2 },
1349  			{ 0x04, "permilps", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 },
1350  			{ 0x61, "pcmpestri", T_0F3A | T_66, true, 3 },
1351  			{ 0x60, "pcmpestrm", T_0F3A | T_66, true, 3 },
1352  			{ 0x63, "pcmpistri", T_0F3A | T_66, true, 3 },
1353  			{ 0x62, "pcmpistrm", T_0F3A | T_66, true, 3 },
1354  			{ 0x0E, "testps", T_0F38 | T_66 | T_YMM, false, 2 },
1355  			{ 0x0F, "testpd", T_0F38 | T_66 | T_YMM, false, 2 },
1356  			{ 0x2F, "comisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
1357  			{ 0x2F, "comiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
1358  			{ 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 },
1359  			{ 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 },
1360  			{ 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_SAE_Z, false, 2 },
1361  			{ 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 },
1362  			{ 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 },
1363  			{ 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, false, 3 },
1364  			{ 0x6F, "movdqa", T_0F | T_66 | T_YMM, false, 2 },
1365  			{ 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false, 2 },
1366  			{ 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 },
1367  			{ 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 },
1368  			{ 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 },
1369  			{ 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 },
1370  
1371  			{ 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 },
1372  			{ 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 },
1373  			{ 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, 2 },
1374  			{ 0x41, "phminposuw", T_0F38 | T_66, false, 3 },
1375  
1376  			{ 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1377  			{ 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1378  			{ 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 },
1379  			{ 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1380  			{ 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1381  			{ 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 },
1382  
1383  			{ 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1384  			{ 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1385  			{ 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 },
1386  			{ 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1387  			{ 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1388  			{ 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 },
1389  
1390  			{ 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 },
1391  			{ 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true, 2 },
1392  			{ 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX, true, 2 },
1393  
1394  			{ 0x17, "ptest", T_0F38 | T_66 | T_YMM, false, 3 },
1395  			{ 0x53, "rcpps", T_0F | T_YMM, false, 2 },
1396  			{ 0x52, "rsqrtps", T_0F | T_YMM, false, 2 },
1397  
1398  			{ 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64, false, 2 },
1399  			{ 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32, false, 2 },
1400  
1401  			{ 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
1402  			{ 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
1403  		};
1404  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1405  			const Tbl *p = &tbl[i];
1406  			std::string s = type2String(p->type);
1407  			if (p->mode & 1) {
1408  				const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
1409  				const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
1410  				printf("void %s(const Xmm& xmm, const Operand& op%s) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, immS1, s.c_str(), p->code, immS2);
1411  			}
1412  			if (p->mode & 2) {
1413  				printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n"
1414  					, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
1415  			}
1416  		}
1417  	}
1418  	// sha
1419  	{
1420  		const struct Tbl {
1421  			uint8_t code;
1422  			uint8_t code2;
1423  			const char *name;
1424  		} tbl[] = {
1425  			{ 0xC8, 0xD8, "sha1nexte" },
1426  			{ 0xC9, 0xD9, "sha1msg1" },
1427  			{ 0xCA, 0xDA, "sha1msg2" },
1428  			{ 0xCB, 0xDB, "sha256rnds2" },
1429  			{ 0xCC, 0xDC, "sha256msg1" },
1430  			{ 0xCD, 0xDD, "sha256msg2" },
1431  		};
1432  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1433  			const Tbl *p = &tbl[i];
1434  			printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2);
1435  		}
1436  		puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }");
1437  	}
1438  	// (m, x), (m, y)
1439  	{
1440  		const struct Tbl {
1441  			uint8_t code;
1442  			const char *name;
1443  			uint64_t type;
1444  		} tbl[] = {
1445  			{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
1446  			{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
1447  			{ 0x7F, "movdqa", T_0F | T_66 | T_YMM  },
1448  			{ 0x7F, "movdqu", T_0F | T_F3 | T_YMM  },
1449  			{ 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
1450  			{ 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
1451  		};
1452  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1453  			const Tbl *p = &tbl[i];
1454  			std::string s = type2String(p->type);
1455  			printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n"
1456  				, p->name, s.c_str(), p->code);
1457  		}
1458  	}
1459  	// (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m)
1460  	{
1461  		const struct Tbl {
1462  			uint8_t code;
1463  			const char *name;
1464  			uint64_t type;
1465  			int mode; // 1 : sse, 2 : avx, 3 : sse + avx
1466  		} tbl[] = {
1467  			{ 0xD0, "addsubpd", T_0F | T_66 | T_YMM, 3 },
1468  			{ 0xD0, "addsubps", T_0F | T_F2 | T_YMM, 3 },
1469  			{ 0x7C, "haddpd", T_0F | T_66 | T_YMM, 3 },
1470  			{ 0x7C, "haddps", T_0F | T_F2 | T_YMM, 3 },
1471  			{ 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 },
1472  			{ 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 },
1473  
1474  			{ 0xDC, "aesenc", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1475  			{ 0xDD, "aesenclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1476  			{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1477  			{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1478  		};
1479  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1480  			const Tbl *p = &tbl[i];
1481  			std::string s = type2String(p->type);
1482  			if (p->mode & 1) {
1483  				printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, s.c_str(), p->code);
1484  			}
1485  			if (p->mode & 2) {
1486  				printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n"
1487  					, p->name, s.c_str(), p->code);
1488  			}
1489  		}
1490  	}
1491  	// vmaskmov
1492  	{
1493  		const char suf[][8] = { "ps", "pd" };
1494  		for (int i = 0; i < 2; i++) {
1495  			printf("void vmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2C + i);
1496  			printf("void vmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2E + i);
1497  		}
1498  	}
1499  	// vpmaskmov
1500  	{
1501  		const char suf[][8] = { "d", "q" };
1502  		for (int i = 0; i < 2; i++) {
1503  			printf("void vpmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8C);
1504  			printf("void vpmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8E);
1505  		}
1506  	}
1507  	// vpermd, vpermps
1508  	{
1509  		const struct Tbl {
1510  			uint8_t code;
1511  			const char *name;
1512  			uint64_t type;
1513  		} tbl[] = {
1514  			{ 0x36, "vpermd", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 },
1515  			{ 0x36, "vpermq", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64 },
1516  			{ 0x16, "vpermps", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 },
1517  			{ 0x16, "vpermpd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_YMM | T_B64 },
1518  		};
1519  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1520  			const Tbl& p = tbl[i];
1521  			std::string s = type2String(p.type);
1522  			printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
1523  		}
1524  	}
1525  	// vpermq, vpermpd
1526  	{
1527  		const struct Tbl {
1528  			uint8_t code;
1529  			const char *name;
1530  			uint64_t type;
1531  		} tbl[] = {
1532  			{ 0x00, "vpermq", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 },
1533  			{ 0x01, "vpermpd", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 },
1534  		};
1535  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1536  			const Tbl& p = tbl[i];
1537  			std::string s = type2String(p.type);
1538  			printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, s.c_str(), p.code);
1539  		}
1540  	}
1541  	// vcmpeqps
1542  	{
1543  		const char pred[32][16] = {
1544  			"eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
1545  			"eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
1546  			"true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
1547  			"eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
1548  		};
1549  		const char suf[][4] = { "pd", "ps", "sd", "ss" };
1550  		for (int i = 0; i < 4; i++) {
1551  			const char *s = suf[i];
1552  			for (int j = 0; j < 32; j++) {
1553  				if (j < 8) {
1554  					printf("void cmp%s%s(const Xmm& x, const Operand& op) { cmp%s(x, op, %d); }\n", pred[j], s, s, j);
1555  				}
1556  				printf("void vcmp%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmp%s(x1, x2, op, %d); }\n", pred[j], s, s, j);
1557  			}
1558  		}
1559  	}
1560  	// vmov(h|l)(pd|ps)
1561  	{
1562  		const struct Tbl {
1563  			bool isH;
1564  			bool isPd;
1565  			uint8_t code;
1566  		} tbl[] = {
1567  			{ true, true, 0x16 },
1568  			{ true, false, 0x16 },
1569  			{ false, true, 0x12 },
1570  			{ false, false, 0x12 },
1571  		};
1572  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1573  			const Tbl& p = tbl[i];
1574  			char c = p.isH ? 'h' : 'l';
1575  			const char *suf = p.isPd ? "pd" : "ps";
1576  			std::string s = type2String(p.isPd ? (T_0F | T_66 | T_EVEX | T_EW1 | T_N8) : (T_0F | T_EVEX | T_EW0 | T_N8));
1577  			printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n"
1578  				, c, suf, s.c_str(), p.code);
1579  			printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n"
1580  				, c, suf, s.c_str(), p.code + 1);
1581  		}
1582  	}
1583  	// FMA
1584  	{
1585  		const struct Tbl {
1586  			uint8_t code;
1587  			const char *name;
1588  			bool supportYMM;
1589  		} tbl[] = {
1590  			{ 0x08, "vfmadd", true },
1591  			{ 0x09, "vfmadd", false },
1592  			{ 0x06, "vfmaddsub", true },
1593  			{ 0x07, "vfmsubadd", true },
1594  			{ 0x0A, "vfmsub", true },
1595  			{ 0x0B, "vfmsub", false },
1596  			{ 0x0C, "vfnmadd", true },
1597  			{ 0x0D, "vfnmadd", false },
1598  			{ 0x0E, "vfnmsub", true },
1599  			{ 0x0F, "vfnmsub", false },
1600  		};
1601  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1602  			for (int j = 0; j < 2; j++) {
1603  				const char sufTbl[][2][8] = {
1604  					{ "pd", "ps" },
1605  					{ "sd", "ss" },
1606  				};
1607  				for (int k = 0; k < 3; k++) {
1608  					const struct Ord {
1609  						const char *str;
1610  						uint8_t code;
1611  					} ord[] = {
1612  						{ "132", 0x90 },
1613  						{ "213", 0xA0 },
1614  						{ "231", 0xB0 },
1615  					};
1616  					int t = T_0F38 | T_66 | T_EVEX;
1617  					t |= (j == 0) ? (T_W1 | T_EW1) : (T_W0 | T_EW0);
1618  					if (tbl[i].supportYMM) t |= T_YMM;
1619  					const std::string suf = sufTbl[tbl[i].supportYMM ? 0 : 1][j];
1620  					if (suf == "pd") {
1621  						t |= T_B64;
1622  					} else if (suf == "ps") {
1623  						t |= T_B32;
1624  					} else if (suf == "sd") {
1625  						t |= T_ER_X | T_N8;
1626  					} else { // ss
1627  						t |= T_ER_X | T_N4;
1628  					}
1629  					std::string s = type2String(t);
1630  					printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
1631  						, tbl[i].name, ord[k].str, suf.c_str(), s.c_str(), tbl[i].code + ord[k].code);
1632  				}
1633  			}
1634  		}
1635  	}
1636  	// FMA others
1637  	{
1638  		printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n");
1639  		printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n");
1640  		printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n");
1641  		const struct Tbl {
1642  			const char *name;
1643  			uint8_t code;
1644  			uint64_t type;
1645  			bool ew1;
1646  		} tbl[] = {
1647  			{ "vbroadcastss", 0x18, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 },
1648  			{ "vpbroadcastb", 0x78, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N1 },
1649  			{ "vpbroadcastw", 0x79, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N2 },
1650  			{ "vpbroadcastd", 0x58, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 },
1651  			{ "vpbroadcastq", 0x59, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8 },
1652  		};
1653  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1654  			const Tbl& p = tbl[i];
1655  			std::string s = type2String(p.type);
1656  			printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
1657  		}
1658  
1659  		puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }");
1660  		puts("void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }");
1661  		puts("void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }");
1662  		puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }");
1663  		puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }");
1664  		puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }");
1665  		puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }");
1666  
1667  		puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }");
1668  		puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }");
1669  		puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }");
1670  		puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }");
1671  
1672  		puts("void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }");
1673  		puts("void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }");
1674  		puts("void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }");
1675  		puts("void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }");
1676  
1677  		puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }");
1678  		puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }");
1679  		puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }");
1680  		puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }");
1681  
1682  		puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }");
1683  
1684  	}
1685  	// (x, x, imm), (x, imm)
1686  	{
1687  		const struct Tbl {
1688  			const char *name;
1689  			uint8_t code;
1690  			int idx;
1691  			uint64_t type;
1692  		} tbl[] = {
1693  			{ "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1694  			{ "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1695  			{ "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1696  			{ "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
1697  			{ "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
1698  			{ "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1699  			{ "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
1700  			{ "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1701  			{ "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
1702  			{ "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
1703  		};
1704  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1705  			const Tbl& p = tbl[i];
1706  			std::string s = type2String(p.type);
1707  			printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code);
1708  		}
1709  	}
1710  	// 4-op
1711  	{
1712  		const struct Tbl {
1713  			const char *name;
1714  			uint8_t code;
1715  		} tbl[] = {
1716  			{ "vblendvpd", 0x4B },
1717  			{ "vblendvps", 0x4A },
1718  			{ "vpblendvb", 0x4C },
1719  		};
1720  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1721  			const Tbl& p = tbl[i];
1722  			printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x%02X, x4.getIdx() << 4); }\n", p.name, p.code);
1723  		}
1724  	}
1725  	// mov
1726  	{
1727  		printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n");
1728  		printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n");
1729  
1730  		printf("void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n");
1731  		printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n");
1732  		printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n");
1733  
1734  		printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n");
1735  		printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n");
1736  
1737  		printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n");
1738  		printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n");
1739  
1740  		puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }");
1741  		puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }");
1742  		puts("void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }");
1743  		puts("void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }");
1744  
1745  		// vmovsd, vmovss
1746  		for (int i = 0; i < 2; i++) {
1747  			char c1 = i == 0 ? 'd' : 's';
1748  			uint64_t type = T_0F | T_EVEX;
1749  			type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4);
1750  			std::string s = type2String(type);
1751  			printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str());
1752  			printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str());
1753  			printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str());
1754  		}
1755  	}
1756  	// cvt
1757  	{
1758  		puts("void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }");
1759  		puts("void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }");
1760  		puts("void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }");
1761  		puts("void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }");
1762  
1763  		puts("void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }");
1764  		puts("void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }");
1765  
1766  
1767  		puts("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }");
1768  		puts("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }");
1769  
1770  		puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }");
1771  		puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
1772  
1773  		puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
1774  
1775  		puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }");
1776  		puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
1777  
1778  	}
1779  	{
1780  		const struct Tbl {
1781  			const char *name;
1782  			uint64_t type;
1783  			uint8_t code;
1784  		} tbl[] = {
1785  			{ "vbcstnebf162ps", T_F3 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
1786  			{ "vbcstnesh2ps", T_66 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
1787  			{ "vcvtneebf162ps", T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0 },
1788  			{ "vcvtneeph2ps", T_66 | T_0F38 | T_W0 | T_YMM, 0xB0 },
1789  			{ "vcvtneobf162ps", T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0 },
1790  			{ "vcvtneoph2ps", T_0F38 | T_W0 | T_YMM, 0xB0 }
1791  		};
1792  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1793  			const Tbl& p = tbl[i];
1794  			printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code);
1795  		}
1796  		printf("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, %s|orEvexIf(encoding), 0x72); }\n", type2String(T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32).c_str());
1797  	}
1798  	// haswell gpr(reg, reg, r/m)
1799  	{
1800  		const struct Tbl {
1801  			const char *name;
1802  			uint64_t type;
1803  			uint8_t code;
1804  		} tbl[] = {
1805  			{ "andn", T_0F38 | T_APX | T_NF, 0xF2 },
1806  			{ "mulx", T_F2 | T_0F38 | T_APX , 0xF6 },
1807  			{ "pdep", T_F2 | T_0F38 | T_APX, 0xF5 },
1808  			{ "pext", T_F3 | T_0F38 | T_APX, 0xF5 },
1809  		};
1810  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1811  			const Tbl& p = tbl[i];
1812  			printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, %s, 0x%x); }\n", p.name, type2String(p.type).c_str(), p.code);
1813  		}
1814  	}
1815  	// gpr(reg, r/m, reg)
1816  	{
1817  		const struct Tbl {
1818  			const char *name;
1819  			uint64_t type;
1820  			uint8_t code;
1821  		} tbl[] = {
1822  			{ "bextr", T_0F38 | T_APX | T_NF, 0xF7 },
1823  			{ "bzhi", T_0F38 | T_APX | T_NF, 0xF5 },
1824  			{ "sarx", T_0F38 | T_F3 | T_APX, 0xF7 },
1825  			{ "shlx", T_0F38 | T_66 | T_APX, 0xF7 },
1826  			{ "shrx", T_0F38 | T_F2 | T_APX, 0xF7 },
1827  		};
1828  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1829  			const Tbl& p = tbl[i];
1830  			printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, %s, 0x%x); }\n", p.name, type2String(p.type).c_str(), p.code);
1831  		}
1832  		puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX, 0xF0, imm); }");
1833  	}
1834  	// gpr(reg, r/m)
1835  	{
1836  		const struct Tbl {
1837  			const char *name;
1838  			uint64_t type;
1839  			uint8_t code;
1840  			uint8_t idx;
1841  		} tbl[] = {
1842  			{ "blsi", T_0F38 | T_APX|T_NF, 0xF3, 3 },
1843  			{ "blsmsk", T_0F38 | T_APX|T_NF, 0xF3, 2 },
1844  			{ "blsr", T_0F38 | T_APX|T_NF, 0xF3, 1 },
1845  		};
1846  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1847  			const Tbl& p = tbl[i];
1848  			printf("void %s(const Reg32e& r, const Operand& op) { opRRO(Reg32e(%d, r.getBit()), r, op, %s, 0x%x); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
1849  		}
1850  	}
1851  	// gather
1852  	{
1853  		const int y_vx_y = 0;
1854  		const int y_vy_y = 1;
1855  		const int x_vy_x = 2;
1856  		const struct Tbl {
1857  			const char *name;
1858  			uint8_t code;
1859  			int w;
1860  			int mode;
1861  		} tbl[] = {
1862  			{ "vgatherdpd", 0x92, 1, y_vx_y },
1863  			{ "vgatherqpd", 0x93, 1, y_vy_y },
1864  			{ "vgatherdps", 0x92, 0, y_vy_y },
1865  			{ "vgatherqps", 0x93, 0, x_vy_x },
1866  			{ "vpgatherdd",  0x90, 0, y_vy_y },
1867  			{ "vpgatherqd",  0x91, 0, x_vy_x },
1868  			{ "vpgatherdq",  0x90, 1, y_vx_y },
1869  			{ "vpgatherqq",  0x91, 1, y_vy_y },
1870  		};
1871  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1872  			const Tbl& p = tbl[i];
1873  			printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
1874  		}
1875  	}
1876  	// vnni
1877  	{
1878  		const struct Tbl {
1879  			uint8_t code;
1880  			const char *name;
1881  			uint64_t type;
1882  		} tbl[] = {
1883  			{ 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1884  			{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1885  			{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1886  			{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1887  			{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
1888  			{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
1889  		};
1890  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1891  			const Tbl *p = &tbl[i];
1892  			std::string s = type2String(p->type);
1893  			printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, s.c_str(), p->code);
1894  		}
1895  	}
1896  	// avx-vnni-int8
1897  	// avx-vnni-int16
1898  	{
1899  		const struct Tbl {
1900  			uint8_t code;
1901  			const char *name;
1902  			uint64_t type;
1903  		} tbl[] = {
1904  			{ 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
1905  			{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
1906  			{ 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
1907  			{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
1908  			{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
1909  			{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
1910  
1911  			{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
1912  			{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
1913  			{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
1914  			{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
1915  			{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
1916  			{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
1917  		};
1918  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1919  			const Tbl *p = &tbl[i];
1920  			std::string s = type2String(p->type);
1921  			printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
1922  		}
1923  	}
1924  }
1925  
1926  void put32()
1927  {
1928  	put_jREGz("cx", true);
1929  	put_jREGz("ecx", false);
1930  
1931  	const GenericTbl tbl[] = {
1932  		{ "aaa", 0x37 },
1933  		{ "aad", 0xD5, 0x0A },
1934  		{ "aam", 0xD4, 0x0A },
1935  		{ "aas", 0x3F },
1936  		{ "daa", 0x27 },
1937  		{ "das", 0x2F },
1938  		{ "into", 0xCE },
1939  		{ "popad", 0x61 },
1940  		{ "popfd", 0x9D },
1941  		{ "pusha", 0x60 },
1942  		{ "pushad", 0x60 },
1943  		{ "pushfd", 0x9C },
1944  		{ "popa", 0x61 },
1945  	};
1946  	putGeneric(tbl, NUM_OF_ARRAY(tbl));
1947  	putLoadSeg("lds", 0, 0xC5);
1948  	putLoadSeg("les", 0, 0xC4);
1949  }
1950  
1951  void put64()
1952  {
1953  	put_jREGz("ecx", true);
1954  	put_jREGz("rcx", false);
1955  
1956  	{
1957  		const GenericTbl tbl[] = {
1958  			{ "cdqe", 0x48, 0x98 },
1959  			{ "cqo", 0x48, 0x99 },
1960  			{ "cmpsq", 0x48, 0xA7 },
1961  			{ "popfq", 0x9D },
1962  			{ "pushfq", 0x9C },
1963  			{ "lodsq", 0x48, 0xAD },
1964  			{ "movsq", 0x48, 0xA5 },
1965  			{ "scasq", 0x48, 0xAF },
1966  			{ "stosq", 0x48, 0xAB },
1967  			{ "syscall", 0x0F, 0x05 },
1968  			{ "sysret", 0x0F, 0x07 },
1969  			{ "clui", 0xF3, 0x0F, 0x01, 0xEE },
1970  			{ "stui", 0xF3, 0x0F, 0x01, 0xEF },
1971  			{ "testui", 0xF3, 0x0F, 0x01, 0xED },
1972  			{ "uiret", 0xF3, 0x0F, 0x01, 0xEC },
1973  		};
1974  		putGeneric(tbl, NUM_OF_ARRAY(tbl));
1975  	}
1976  
1977  	putMemOp("cmpxchg16b", "T_0F", 1, 0xC7, 64);
1978  	putMemOp("fxrstor64", "T_0F", 1, 0xAE, 64);
1979  	puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
1980  	puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
1981  	puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }");
1982  	puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }");
1983  	puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }");
1984  	puts("void senduipi(const Reg64& r) { opRR(Reg32(6), r.cvt32(), T_F3 | T_0F, 0xC7); }");
1985  
1986  	puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }");
1987  	puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }");
1988  	puts("void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }");
1989  	puts("void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }");
1990  
1991  	puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
1992  	puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
1993  	puts("void jmpabs(uint64_t addr) { db(0xD5); db(0x00); db(0xA1); dq(addr); }");
1994  	puts("void push2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W0, 0xFF); }");
1995  	puts("void push2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W1, 0xFF); }");
1996  	puts("void pop2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W0, 0x8F); }");
1997  	puts("void pop2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W1, 0x8F); }");
1998  	// CMPccXADD
1999  	{
2000  		const struct Tbl {
2001  			const char *name;
2002  			uint8_t code;
2003  		} tbl[] = {
2004  			{ "be", 0xE6 },
2005  			{ "b", 0xE2 },
2006  			{ "le", 0xEE },
2007  			{ "l", 0xEC },
2008  			{ "nbe", 0xE7 },
2009  			{ "nb", 0xE3 },
2010  			{ "nle", 0xEF },
2011  			{ "nl", 0xED },
2012  			{ "no", 0xE1 },
2013  			{ "np", 0xEB },
2014  			{ "ns", 0xE9 },
2015  			{ "nz", 0xE5 },
2016  			{ "o", 0xE0 },
2017  			{ "p", 0xEA },
2018  			{ "s", 0xE8 },
2019  			{ "z", 0xE4 },
2020  		};
2021  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2022  			const Tbl *p = &tbl[i];
2023  			printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0x%02X); }\n", p->name, p->code);
2024  		}
2025  	}
2026  	// aes
2027  	{
2028  		const struct Tbl {
2029  			const char *name;
2030  			uint64_t type1;
2031  			uint64_t type2;
2032  			uint8_t code;
2033  			int idx;
2034  		} tbl[] = {
2035  			{ "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 8 },
2036  			{ "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 8 },
2037  			{ "aesdecwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 1 },
2038  			{ "aesdecwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 3 },
2039  			{ "aesenc128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDC, 8 },
2040  			{ "aesenc256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDE, 8 },
2041  			{ "aesencwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 0 },
2042  			{ "aesencwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 2 },
2043  		};
2044  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2045  			const Tbl *p = &tbl[i];
2046  			std::string s1 = type2String(p->type1);
2047  			std::string s2 = type2String(p->type2);
2048  			if (p->idx == 8) {
2049  				printf("void %s(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, s1.c_str(), p->code, s2.c_str(), p->code);
2050  			} else {
2051  				printf("void %s(const Address& addr) { opSSE_APX(xmm%d, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), p->code, s2.c_str(), p->code);
2052  			}
2053  		}
2054  	}
2055  	// encodekey
2056  	puts("void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }");
2057  	puts("void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }");
2058  }
2059  
2060  void putAMX_TILE()
2061  {
2062  	puts("void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }");
2063  	puts("void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }");
2064  	puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }");
2065  	puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); }");
2066  	puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
2067  	puts("void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }");
2068  	puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
2069  }
2070  void putAMX_INT8()
2071  {
2072  	puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }");
2073  	puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
2074  	puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
2075  	puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
2076  	puts("void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }");
2077  }
2078  void putAMX_BF16()
2079  {
2080  	puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }");
2081  }
2082  
2083  void putFixed()
2084  {
2085  	puts("#ifdef XBYAK64");
2086  	put64();
2087  	putAMX_TILE();
2088  	putAMX_INT8();
2089  	putAMX_BF16();
2090  	puts("#else");
2091  	put32();
2092  	puts("#endif");
2093  	puts("#ifndef XBYAK_NO_OP_NAMES");
2094  	const char *tbl[] = {
2095  		"and", "or", "xor",
2096  	};
2097  	for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2098  		const char *name = tbl[i];
2099  		printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name);
2100  		printf("void %s(const Operand& op, uint32_t imm) { %s_(op, imm); }\n", name, name);
2101  	}
2102  	puts("void not(const Operand& op) { not_(op); }");
2103  	puts("#endif");
2104  }
2105  
2106  void putOmit()
2107  {
2108  	puts("void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }");
2109  	puts("void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }");
2110  	puts("void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }");
2111  	puts("void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }");
2112  
2113  	puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }");
2114  	puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }");
2115  	{
2116  		const char pred[32][16] = {
2117  			"eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
2118  			"eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
2119  			"true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
2120  			"eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
2121  		};
2122  		const char suf[][4] = { "pd", "ps", "sd", "ss" };
2123  		for (int i = 0; i < 4; i++) {
2124  			const char *s = suf[i];
2125  			for (int j = 0; j < 32; j++) {
2126  				printf("void vcmp%s%s(const Xmm& x, const Operand& op) { vcmp%s%s(x, x, op); }\n", pred[j], s, pred[j], s);
2127  			}
2128  		}
2129  	}
2130  	{
2131  		const char *tbl[] = {
2132  			"pslldq",
2133  			"psrldq",
2134  			"psllw",
2135  			"pslld",
2136  			"psllq",
2137  			"psraw",
2138  			"psrad",
2139  			"psrlw",
2140  			"psrld",
2141  			"psrlq",
2142  		};
2143  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2144  			const char *name = tbl[i];
2145  			printf("void v%s(const Xmm& x, uint8_t imm) { v%s(x, x, imm); }\n", name, name);
2146  		}
2147  	}
2148  	{
2149  		const char *tbl[] = {
2150  			"vblendvpd",
2151  			"vblendvps",
2152  			"vpblendvb",
2153  		};
2154  		for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2155  			const char *name = tbl[i];
2156  			printf("void %s(const Xmm& x1, const Operand& op, const Xmm& x4) { %s(x1, x1, op, x4); }\n", name, name);
2157  		}
2158  	}
2159  	putX_X_XM(true);
2160  }
2161  
2162  int main(int argc, char *argv[])
2163  {
2164  	std::string mode = argc == 2 ? argv[1] : "";
2165  	if (mode == "") {
2166  		put();
2167  	} else if (mode == "fixed") {
2168  		putFixed();
2169  	} else {
2170  		putOmit();
2171  	}
2172  }