gen_avx512.cpp
1 #define XBYAK_DONT_READ_LIST 2 #include <stdio.h> 3 #include <string.h> 4 #include "../xbyak/xbyak.h" 5 #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) 6 7 using namespace Xbyak; 8 #ifdef _MSC_VER 9 #pragma warning(disable : 4996) // scanf 10 #define snprintf _snprintf_s 11 #endif 12 13 #include "avx_type.hpp" 14 15 void putOpmask(bool only64bit) 16 { 17 if (only64bit) { 18 puts("void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }"); 19 return; 20 } 21 22 { 23 const struct Tbl { 24 const char *name; 25 uint8_t code; 26 } tbl[] = { 27 { "kadd", 0x4A }, 28 { "kand", 0x41 }, 29 { "kandn", 0x42 }, 30 { "kor", 0x45 }, 31 { "kxnor", 0x46 }, 32 { "kxor", 0x47 }, 33 }; 34 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 35 const Tbl& p = tbl[i]; 36 printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code); 37 printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code); 38 printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); 39 printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); 40 } 41 printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n"); 42 printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n"); 43 printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n"); 44 } 45 { 46 const struct Tbl { 47 const char *name; 48 uint8_t code; 49 } tbl[] = { 50 { "knot", 0x44 }, 51 { "kortest", 0x98 }, 52 { "ktest", 0x99 }, 53 }; 54 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 55 const Tbl& p = tbl[i]; 56 printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code); 57 printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code); 58 printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); 59 printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); 60 } 61 } 62 { 63 const struct Tbl { 64 const char *name; 65 uint8_t code; 66 } tbl[] = { 67 { "kshiftl", 0x32 }, 68 { "kshiftr", 0x30 }, 69 }; 70 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 71 const Tbl& p = tbl[i]; 72 printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); 73 printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); 74 printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); 75 printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); 76 } 77 } 78 for (int i = 0; i < 4; i++) { 79 const char tbl[] = "bwdq"; 80 const int bitTbl[] = { 8, 16, 32, 64 }; 81 int bit = bitTbl[i]; 82 printf("void kmov%c(const Opmask& k, const Operand& op) { opKmov(k, op, false, %d); }\n", tbl[i], bit); 83 printf("void kmov%c(const Address& addr, const Opmask& k) { opKmov(k, addr, true, %d); }\n", tbl[i], bit); 84 if (i < 3) printf("void kmov%c(const Reg32& r, const Opmask& k) { opKmov(k, r, true, %d); }\n", tbl[i], bit); 85 } 86 } 87 88 // vcmppd(k, x, op) 89 void putVcmp() 90 { 91 const struct Tbl { 92 uint8_t code; 93 const char *name; 94 uint64_t type; 95 bool hasIMM; 96 } tbl[] = { 97 { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true }, 98 { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true }, 99 { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, 100 { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, 101 { 0xC2, "vcmpph", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B16, true }, 102 { 0xC2, "vcmpsh", T_F3 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true }, 103 104 { 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, 105 { 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, 106 { 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false }, 107 { 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 108 109 { 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, 110 { 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, 111 { 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, 112 { 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 113 114 { 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, 115 { 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, 116 117 { 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, 118 { 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, 119 { 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, 120 { 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, 121 { 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, 122 { 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, 123 124 { 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, 125 { 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, 126 { 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, 127 { 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 128 129 { 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, 130 { 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, 131 { 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, 132 { 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 133 }; 134 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 135 const Tbl *p = &tbl[i]; 136 std::string s = type2String(p->type); 137 printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" 138 , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); 139 } 140 puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }"); 141 puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }"); 142 } 143 144 void putVcmpAlias() 145 { 146 const char pred[32][16] = { 147 "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", 148 "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", 149 "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", 150 "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" 151 }; 152 const char suf[][4] = { "pd", "ps", "sd", "ss" }; 153 for (int i = 0; i < 4; i++) { 154 const char *s = suf[i]; 155 for (int j = 0; j < 32; j++) { 156 printf("void vcmp%s%s(const Opmask& k, const Xmm& x, const Operand& op) { vcmp%s(k, x, op, %d); }\n", pred[j], s, s, j); 157 } 158 } 159 } 160 161 // XM_X 162 void putX_XM() 163 { 164 const struct Tbl { 165 uint8_t code; 166 const char *name; 167 uint64_t type; 168 } tbl[] = { 169 { 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, 170 { 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, 171 { 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, 172 { 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, 173 { 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, 174 { 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, 175 { 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, 176 // putCvt 177 { 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, 178 { 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z }, 179 { 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, 180 { 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, 181 { 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, 182 { 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, 183 { 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z }, 184 { 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z }, 185 186 { 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, 187 { 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, 188 189 { 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, 190 { 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, 191 { 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, 192 { 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, 193 { 0x42, "vgetexpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, 194 195 { 0x7D, "vcvtph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, 196 { 0x7D, "vcvtph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, 197 { 0x7C, "vcvttph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, 198 { 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, 199 { 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, 200 { 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, 201 }; 202 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 203 const Tbl *p = &tbl[i]; 204 std::string s = type2String(p->type); 205 printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); 206 } 207 puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }"); 208 209 puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }"); 210 puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }"); 211 puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }"); 212 puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }"); 213 214 puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }"); 215 puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }"); 216 } 217 218 void putM_X() 219 { 220 const struct Tbl { 221 uint8_t code; 222 const char *name; 223 uint64_t type; 224 } tbl[] = { 225 { 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, 226 { 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, 227 { 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, 228 { 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, 229 { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, 230 { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, 231 { 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K }, 232 { 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 }, 233 }; 234 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 235 const Tbl *p = &tbl[i]; 236 std::string s = type2String(p->type); 237 printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); 238 } 239 } 240 241 void putXM_X() 242 { 243 const struct Tbl { 244 uint8_t code; 245 const char *name; 246 uint64_t type; 247 } tbl[] = { 248 { 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, 249 { 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, 250 251 { 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, 252 { 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, 253 254 { 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 }, 255 { 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 }, 256 }; 257 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 258 const Tbl *p = &tbl[i]; 259 std::string s = type2String(p->type); 260 printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); 261 } 262 } 263 264 void putX_X_XM_IMM() 265 { 266 const struct Tbl { 267 uint8_t code; 268 const char *name; 269 uint64_t type; 270 bool hasIMM; 271 } tbl[] = { 272 { 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true }, 273 { 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true }, 274 { 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, 275 { 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, 276 { 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, 277 { 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, 278 { 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 279 { 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 280 { 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 281 { 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 282 { 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false }, 283 { 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 284 { 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, 285 { 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, 286 { 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, 287 { 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, 288 { 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 289 290 { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, 291 { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 292 293 { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, 294 295 { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, 296 { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, 297 298 { 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 299 { 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 300 { 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, 301 { 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, 302 { 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 303 { 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 304 305 { 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, 306 { 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, 307 { 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 308 { 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 309 { 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 310 { 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 311 312 { 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, 313 { 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, 314 { 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 315 { 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 316 { 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 317 { 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 318 319 { 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, 320 { 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, 321 322 { 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false }, 323 { 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false }, 324 { 0x43, "vgetexpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false }, 325 { 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, 326 { 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, 327 { 0x27, "vgetmantsh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true }, 328 329 { 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true }, 330 { 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true }, 331 { 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true }, 332 { 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true }, 333 334 { 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false }, 335 { 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false }, 336 337 { 0x4D, "vrcpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false }, 338 339 { 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false }, 340 { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false }, 341 342 { 0x4F, "vrsqrtsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false }, 343 { 0x51, "vsqrtsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false }, 344 345 { 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, true }, 346 { 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, true }, 347 { 0x0A, "vrndscalesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, true }, 348 349 { 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false }, 350 { 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false }, 351 { 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, 352 { 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, 353 354 { 0x2C, "vscalefph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Z, false }, 355 { 0x2D, "vscalefsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false }, 356 357 { 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true }, 358 { 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 359 360 { 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 361 { 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 362 363 { 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 364 { 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 365 366 { 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, 367 { 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, 368 369 { 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, 370 { 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, 371 372 { 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, 373 { 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, 374 { 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, 375 { 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, 376 377 { 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, 378 { 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, 379 { 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true }, 380 381 { 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, 382 { 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true }, 383 { 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true }, 384 385 { 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, 386 { 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, 387 { 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, 388 389 { 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, 390 { 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true }, 391 { 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true }, 392 393 { 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, 394 { 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, 395 { 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, 396 397 { 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, 398 { 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, 399 400 { 0x5A, "vcvtsd2sh", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, 401 { 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false }, 402 { 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false }, 403 { 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, 404 }; 405 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 406 const Tbl *p = &tbl[i]; 407 std::string s = type2String(p->type); 408 printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" 409 , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); 410 } 411 } 412 413 void putShift() 414 { 415 const struct Tbl { 416 const char *name; 417 uint8_t code; 418 int idx; 419 uint64_t type; 420 } tbl[] = { 421 { "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 }, 422 { "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, 423 { "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, 424 { "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, 425 { "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, 426 }; 427 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 428 const Tbl& p = tbl[i]; 429 std::string s = type2String(p.type); 430 printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code); 431 } 432 } 433 434 void putExtractInsert() 435 { 436 { 437 const struct Tbl { 438 const char *name; 439 uint8_t code; 440 uint64_t type; 441 bool isZMM; 442 } tbl[] = { 443 { "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, 444 { "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, 445 { "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, 446 { "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, 447 448 { "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, 449 { "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, 450 { "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, 451 { "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, 452 }; 453 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 454 const Tbl& p = tbl[i]; 455 std::string s = type2String(p.type); 456 const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; 457 printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, s.c_str(), p.code); 458 } 459 } 460 { 461 const struct Tbl { 462 const char *name; 463 uint8_t code; 464 uint64_t type; 465 bool isZMM; 466 } tbl[] = { 467 { "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, 468 { "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, 469 { "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, 470 { "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, 471 472 { "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, 473 { "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, 474 { "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, 475 { "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, 476 }; 477 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 478 const Tbl& p = tbl[i]; 479 std::string s = type2String(p.type); 480 const char *x = p.isZMM ? "Zmm" : "Ymm"; 481 const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; 482 printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {" 483 "if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) " 484 "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, s.c_str(), p.code); 485 } 486 } 487 } 488 489 void putBroadcast(bool only64bit) 490 { 491 { 492 const struct Tbl { 493 uint8_t code; 494 const char *name; 495 uint64_t type; 496 int reg; 497 } tbl[] = { 498 { 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 }, 499 { 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 }, 500 { 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 }, 501 { 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64}, 502 }; 503 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 504 const Tbl& p = tbl[i]; 505 std::string s = type2String(p.type); 506 if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) { 507 printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, s.c_str(), p.code); 508 } 509 } 510 } 511 if (only64bit) return; 512 puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }"); 513 puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }"); 514 puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }"); 515 puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }"); 516 puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }"); 517 518 puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }"); 519 puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }"); 520 puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }"); 521 puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }"); 522 puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }"); 523 } 524 525 void putCvt() 526 { 527 const struct Tbl { 528 uint8_t code; 529 const char *name; 530 uint64_t type; 531 int ptn; 532 } tbl[] = { 533 { 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 }, 534 { 0x79, "vcvtss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X, 0 }, 535 { 0x78, "vcvttsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X, 0 }, 536 { 0x78, "vcvttss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X, 0 }, 537 { 0x2D, "vcvtsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 }, 538 { 0x79, "vcvtsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 }, 539 { 0x2C, "vcvttsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 }, 540 { 0x78, "vcvttsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 }, 541 542 { 0x7B, "vcvtps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 }, 543 { 0x79, "vcvtps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 }, 544 { 0x7A, "vcvttps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 }, 545 { 0x78, "vcvttps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 }, 546 { 0x7A, "vcvtudq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 1 }, 547 { 0x5B, "vcvtph2dq", T_66 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 }, 548 { 0x13, "vcvtph2psx", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 }, 549 { 0x79, "vcvtph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 }, 550 { 0x5B, "vcvttph2dq", T_F3 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 }, 551 { 0x78, "vcvttph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 }, 552 553 { 0x79, "vcvtpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 }, 554 { 0x5B, "vcvtqq2ps", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 }, 555 { 0x78, "vcvttpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 2 }, 556 { 0x7A, "vcvtuqq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 }, 557 558 { 0x5A, "vcvtph2pd", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, 559 { 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, 560 { 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, 561 { 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, 562 { 0x7A, "vcvttph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, 563 564 { 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, 565 { 0x1D, "vcvtps2phx", T_66 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, 566 { 0x7A, "vcvtudq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, 567 568 { 0x5A, "vcvtpd2ph", T_66 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 }, 569 { 0x5B, "vcvtqq2ph", T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 }, 570 { 0x7A, "vcvtuqq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 }, 571 572 { 0x2A, "vcvtsi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 }, 573 { 0x7B, "vcvtusi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 }, 574 }; 575 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 576 const Tbl& p = tbl[i]; 577 std::string s = type2String(p.type); 578 switch (p.ptn) { 579 case 0: 580 printf("void %s(const Reg32e& r, const Operand& op) { uint64_t type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code); 581 break; 582 case 1: 583 printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); 584 break; 585 case 2: 586 printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); 587 break; 588 case 3: 589 printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); 590 break; 591 case 4: 592 printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); 593 break; 594 case 5: 595 printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); 596 break; 597 case 6: 598 printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code); 599 break; 600 } 601 } 602 puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); 603 puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); 604 } 605 606 enum { // same as xbyak.h 607 xx_yy_zz = 0, 608 xx_yx_zy = 1, 609 xx_xy_yz = 2, 610 }; 611 void putGather() 612 { 613 const struct Tbl { 614 const char *name; 615 uint64_t type; 616 uint8_t code; 617 int mode; 618 } tbl[] = { 619 { "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz }, 620 { "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy }, 621 { "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz }, 622 { "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz }, 623 { "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz }, 624 { "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy }, 625 { "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz }, 626 { "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz }, 627 }; 628 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 629 const Tbl& p = tbl[i]; 630 std::string s = type2String(p.type | T_VSIB); 631 printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode); 632 } 633 } 634 void putScatter() 635 { 636 const struct Tbl { 637 const char *name; 638 uint64_t type; 639 uint8_t code; 640 int mode; // reverse of gather 641 } tbl[] = { 642 { "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz }, 643 { "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy }, 644 { "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz }, 645 { "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz }, 646 647 { "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz }, 648 { "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy }, 649 { "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz }, 650 { "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz }, 651 }; 652 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 653 const Tbl& p = tbl[i]; 654 std::string s = type2String(p.type | T_VSIB); 655 printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode); 656 } 657 } 658 659 void putShuff() 660 { 661 puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); 662 puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); 663 puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); 664 puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); 665 } 666 667 void putMov() 668 { 669 puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }"); 670 puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }"); 671 puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }"); 672 puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }"); 673 674 puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }"); 675 puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }"); 676 puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }"); 677 puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }"); 678 679 { 680 const struct Tbl { 681 uint8_t code; 682 const char *name; 683 uint64_t type; 684 int mode; 685 } tbl[] = { 686 { 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false }, 687 { 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false }, 688 { 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false }, 689 690 { 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false }, 691 { 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false }, 692 { 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false }, 693 694 { 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 695 { 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 696 { 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 697 698 { 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false }, 699 { 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false }, 700 { 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false }, 701 702 { 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 703 { 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 704 { 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 705 706 { 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 707 { 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 708 { 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true }, 709 }; 710 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 711 const Tbl& p = tbl[i]; 712 std::string s = type2String(p.type); 713 printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, s.c_str(), p.code, p.mode ? "true" : "false"); 714 } 715 } 716 } 717 718 void putX_XM_IMM() 719 { 720 const struct Tbl { 721 uint8_t code; 722 const char *name; 723 uint64_t type; 724 bool hasIMM; 725 } tbl[] = { 726 { 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, 727 { 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, 728 { 0x26, "vgetmantph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true }, 729 { 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 730 { 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 731 732 { 0x4C, "vrcpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, false }, 733 734 { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 735 { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 736 737 { 0x4E, "vrsqrtph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16, false }, 738 { 0x51, "vsqrtph", T_MAP5| T_YMM | T_MUST_EVEX | T_EW0 | T_ER_Z | T_B16, false }, 739 740 { 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, 741 { 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, 742 { 0x08, "vrndscaleph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true }, 743 744 { 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 745 { 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 746 747 { 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, 748 { 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, 749 750 { 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, 751 { 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, 752 { 0x56, "vreduceph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true }, 753 754 { 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false }, 755 { 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, 756 { 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, 757 { 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, 758 759 { 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false }, 760 { 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false }, 761 }; 762 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 763 const Tbl *p = &tbl[i]; 764 std::string s = type2String(p->type); 765 printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" 766 , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); 767 } 768 } 769 770 void putMisc() 771 { 772 puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }"); 773 puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }"); 774 { 775 const struct Tbl { 776 const char *name; 777 int zm; 778 uint64_t type; 779 uint8_t code; 780 bool isZmm; 781 } tbl[] = { 782 { "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true }, 783 { "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true }, 784 { "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false }, 785 { "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true }, 786 787 { "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true }, 788 { "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true }, 789 { "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false }, 790 { "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true }, 791 792 { "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true }, 793 { "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true }, 794 { "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false }, 795 { "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true }, 796 797 { "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true }, 798 { "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true }, 799 { "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false }, 800 { "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true }, 801 }; 802 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 803 const Tbl& p = tbl[i]; 804 std::string s = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB); 805 printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n" 806 , p.name, p.zm, s.c_str(), p.code, p.isZmm ? "ZMM" : "YMM"); 807 } 808 } 809 810 puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); 811 puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); 812 puts("void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }"); 813 puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); 814 puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); 815 puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }"); 816 817 puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }"); 818 819 puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }"); 820 puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }"); 821 } 822 823 void putV4FMA() 824 { 825 puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }"); 826 puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }"); 827 puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }"); 828 puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }"); 829 puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }"); 830 puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }"); 831 } 832 833 void putFP16_1() 834 { 835 const struct Tbl { 836 uint8_t code; 837 const char *name; 838 } tbl[] = { 839 { 0x58, "add" }, 840 { 0x5C, "sub" }, 841 { 0x59, "mul" }, 842 { 0x5E, "div" }, 843 { 0x5F, "max" }, 844 { 0x5D, "min" }, 845 }; 846 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 847 const Tbl *p = &tbl[i]; 848 printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x%02X); }\n", p->name, p->code); 849 printf("void v%ssh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x%02X); }\n", p->name, p->code); 850 } 851 } 852 853 void putFP16_FMA() 854 { 855 const struct Tbl { 856 uint8_t code; 857 const char *name; 858 bool isPH; 859 } tbl[] = { 860 { 0x06, "vfmaddsub", true }, 861 { 0x07, "vfmsubadd", true }, 862 { 0x08, "vfmadd", true }, 863 { 0x0C, "vfnmadd", true }, 864 { 0x0A, "vfmsub", true }, 865 { 0x0E, "vfnmsub", true }, 866 { 0x09, "vfmadd", false }, 867 { 0x0D, "vfnmadd", false }, 868 { 0x0B, "vfmsub", false }, 869 { 0x0F, "vfnmsub", false }, 870 }; 871 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 872 for (int k = 0; k < 3; k++) { 873 const struct Ord { 874 const char *str; 875 uint8_t code; 876 } ord[] = { 877 { "132", 0x90 }, 878 { "213", 0xA0 }, 879 { "231", 0xB0 }, 880 }; 881 uint64_t type = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX; 882 const char *suf = 0; 883 if (tbl[i].isPH) { 884 type |= T_ER_Z | T_YMM | T_B16; 885 suf = "ph"; 886 } else { 887 type |= T_ER_X | T_N2; 888 suf = "sh"; 889 } 890 std::string s = type2String(type); 891 printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" 892 , tbl[i].name, ord[k].str, suf, s.c_str(), tbl[i].code | ord[k].code); 893 } 894 } 895 } 896 void putFP16_FMA2() 897 { 898 const struct Tbl { 899 uint8_t code; 900 const char *name; 901 bool isPH; 902 } tbl[] = { 903 { 0x56, "maddc", true }, 904 { 0xD6, "mulc", true }, 905 }; 906 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { 907 for (int j = 0; j < 2; j++) { 908 uint64_t type = T_MAP6 | T_EW0 | T_MUST_EVEX; 909 if (j == 0) { 910 type |= T_F2; 911 } else { 912 type |= T_F3; 913 } 914 const char *suf = 0; 915 if (tbl[i].isPH) { 916 type |= T_ER_Z | T_YMM | T_B32; 917 suf = "ph"; 918 } else { 919 type |= T_ER_X | T_N2; 920 suf = "sh"; 921 } 922 std::string s = type2String(type); 923 printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" 924 , j == 0 ? "c" : "", tbl[i].name, suf, s.c_str(), tbl[i].code); 925 } 926 } 927 } 928 929 void putFP16_2() 930 { 931 { 932 uint64_t type = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2; 933 std::string s = type2String(type); 934 printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str()); 935 printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str()); 936 } 937 { 938 uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2; 939 std::string s = type2String(type); 940 printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str()); 941 printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str()); 942 } 943 } 944 945 void putFP16() 946 { 947 putFP16_1(); 948 putFP16_FMA(); 949 putFP16_FMA2(); 950 putFP16_2(); 951 } 952 953 int main(int argc, char *[]) 954 { 955 bool only64bit = argc == 2; 956 putOpmask(only64bit); 957 putBroadcast(only64bit); 958 if (only64bit) { 959 return 0; 960 } 961 putVcmp(); 962 putVcmpAlias(); 963 putX_XM(); 964 putM_X(); 965 putXM_X(); 966 putX_X_XM_IMM(); 967 putShift(); 968 putExtractInsert(); 969 putCvt(); 970 putGather(); 971 putShuff(); 972 putMov(); 973 putX_XM_IMM(); 974 putMisc(); 975 putScatter(); 976 putV4FMA(); 977 putFP16(); 978 }