/ externals / xbyak / test / apx.cpp
apx.cpp
   1  #include <stdio.h>
   2  #include <string.h>
   3  #include <xbyak/xbyak.h>
   4  #include <xbyak/xbyak_util.h>
   5  #include <cybozu/test.hpp>
   6  
   7  #ifndef XBYAK64
   8  	#error "only 64-bit mode"
   9  #endif
  10  
  11  using namespace Xbyak;
  12  
  13  CYBOZU_TEST_AUTO(reg_rm)
  14  {
  15  	struct Code : Xbyak::CodeGenerator {
  16  		Code()
  17  		{
  18  			adc(r17, ptr [rax]);
  19  			adc(ptr [r18], rdx);
  20  			adc(r30, rcx);
  21  			add(r17, ptr [rax]);
  22  			add(ptr [r18], rdx);
  23  			add(r30, rcx);
  24  			and_(r17, ptr [rax]);
  25  			and_(ptr [r18], rdx);
  26  			and_(r30, rcx);
  27  			cmp(r17, ptr [rax]);
  28  			cmp(ptr [r18], rdx);
  29  			cmp(r30, rcx);
  30  			or_(r17, ptr [rax]);
  31  			or_(ptr [r18], rdx);
  32  			or_(r30, rcx);
  33  			sbb(r17, ptr [rax]);
  34  			sbb(ptr [r18], rdx);
  35  			sbb(r30, rcx);
  36  			sub(r17, ptr [rax]);
  37  			sub(ptr [r18], rdx);
  38  			sub(r30, rcx);
  39  			xor_(r17, ptr [rax]);
  40  			xor_(ptr [r18], rdx);
  41  			xor_(r30, rcx);
  42  
  43  			add(r30, ptr [rbx+rcx*4]);
  44  			add(rax, ptr [r30+rcx*4]);
  45  			add(rax, ptr [rbx+r30*4]);
  46  		}
  47  	}  c;
  48  	const uint8_t tbl[] = {
  49  		0xd5, 0x48, 0x13, 0x08, 0xd5, 0x18, 0x11, 0x12, 0xd5, 0x19, 0x11, 0xce, 0xd5, 0x48, 0x03, 0x08,
  50  		0xd5, 0x18, 0x01, 0x12, 0xd5, 0x19, 0x01, 0xce, 0xd5, 0x48, 0x23, 0x08, 0xd5, 0x18, 0x21, 0x12,
  51  		0xd5, 0x19, 0x21, 0xce, 0xd5, 0x48, 0x3b, 0x08, 0xd5, 0x18, 0x39, 0x12, 0xd5, 0x19, 0x39, 0xce,
  52  		0xd5, 0x48, 0x0b, 0x08, 0xd5, 0x18, 0x09, 0x12, 0xd5, 0x19, 0x09, 0xce, 0xd5, 0x48, 0x1b, 0x08,
  53  		0xd5, 0x18, 0x19, 0x12, 0xd5, 0x19, 0x19, 0xce, 0xd5, 0x48, 0x2b, 0x08, 0xd5, 0x18, 0x29, 0x12,
  54  		0xd5, 0x19, 0x29, 0xce, 0xd5, 0x48, 0x33, 0x08, 0xd5, 0x18, 0x31, 0x12, 0xd5, 0x19, 0x31, 0xce,
  55  		0xd5, 0x4c, 0x03, 0x34, 0x8b, 0xd5, 0x19, 0x03, 0x04, 0x8e, 0xd5, 0x2a, 0x03, 0x04, 0xb3,
  56  	};
  57  	const size_t n = sizeof(tbl);
  58  	CYBOZU_TEST_EQUAL(c.getSize(), n);
  59  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
  60  }
  61  
  62  CYBOZU_TEST_AUTO(reg64)
  63  {
  64  	struct Code : Xbyak::CodeGenerator {
  65  		Code()
  66  		{
  67  			adc(r30, rax); adc(r30, rcx); adc(r30, rdx); adc(r30, rbx); adc(r30, rsp); adc(r30, rbp); adc(r30, rsi); adc(r30, rdi);
  68  			adc(r30, r8); adc(r30, r9); adc(r30, r10); adc(r30, r11); adc(r30, r12); adc(r30, r13); adc(r30, r14); adc(r30, r15);
  69  			adc(r30, r16); adc(r30, r17); adc(r30, r18); adc(r30, r19); adc(r30, r20); adc(r30, r21); adc(r30, r22); adc(r30, r23);
  70  			adc(r30, r24); adc(r30, r25); adc(r30, r26); adc(r30, r27); adc(r30, r28); adc(r30, r29); adc(r30, r30); adc(r30, r31);
  71  			adc(rax, r30); adc(rcx, r30); adc(rdx, r30); adc(rbx, r30); adc(rsp, r30); adc(rbp, r30); adc(rsi, r30); adc(rdi, r30);
  72  			adc(r8, r30); adc(r9, r30); adc(r10, r30); adc(r11, r30); adc(r12, r30); adc(r13, r30); adc(r14, r30); adc(r15, r30);
  73  			adc(r16, r30); adc(r17, r30); adc(r18, r30); adc(r19, r30); adc(r20, r30); adc(r21, r30); adc(r22, r30); adc(r23, r30);
  74  			adc(r24, r30); adc(r25, r30); adc(r26, r30); adc(r27, r30); adc(r28, r30); adc(r29, r30); adc(r30, r30); adc(r31, r30);
  75  		}
  76  	}  c;
  77  	const uint8_t tbl[] = {
  78  		0xd5, 0x19, 0x11, 0xc6, 0xd5, 0x19, 0x11, 0xce, 0xd5, 0x19, 0x11, 0xd6, 0xd5, 0x19, 0x11, 0xde,
  79  		0xd5, 0x19, 0x11, 0xe6, 0xd5, 0x19, 0x11, 0xee, 0xd5, 0x19, 0x11, 0xf6, 0xd5, 0x19, 0x11, 0xfe,
  80  		0xd5, 0x1d, 0x11, 0xc6, 0xd5, 0x1d, 0x11, 0xce, 0xd5, 0x1d, 0x11, 0xd6, 0xd5, 0x1d, 0x11, 0xde,
  81  		0xd5, 0x1d, 0x11, 0xe6, 0xd5, 0x1d, 0x11, 0xee, 0xd5, 0x1d, 0x11, 0xf6, 0xd5, 0x1d, 0x11, 0xfe,
  82  		0xd5, 0x59, 0x11, 0xc6, 0xd5, 0x59, 0x11, 0xce, 0xd5, 0x59, 0x11, 0xd6, 0xd5, 0x59, 0x11, 0xde,
  83  		0xd5, 0x59, 0x11, 0xe6, 0xd5, 0x59, 0x11, 0xee, 0xd5, 0x59, 0x11, 0xf6, 0xd5, 0x59, 0x11, 0xfe,
  84  		0xd5, 0x5d, 0x11, 0xc6, 0xd5, 0x5d, 0x11, 0xce, 0xd5, 0x5d, 0x11, 0xd6, 0xd5, 0x5d, 0x11, 0xde,
  85  		0xd5, 0x5d, 0x11, 0xe6, 0xd5, 0x5d, 0x11, 0xee, 0xd5, 0x5d, 0x11, 0xf6, 0xd5, 0x5d, 0x11, 0xfe,
  86  		0xd5, 0x4c, 0x11, 0xf0, 0xd5, 0x4c, 0x11, 0xf1, 0xd5, 0x4c, 0x11, 0xf2, 0xd5, 0x4c, 0x11, 0xf3,
  87  		0xd5, 0x4c, 0x11, 0xf4, 0xd5, 0x4c, 0x11, 0xf5, 0xd5, 0x4c, 0x11, 0xf6, 0xd5, 0x4c, 0x11, 0xf7,
  88  		0xd5, 0x4d, 0x11, 0xf0, 0xd5, 0x4d, 0x11, 0xf1, 0xd5, 0x4d, 0x11, 0xf2, 0xd5, 0x4d, 0x11, 0xf3,
  89  		0xd5, 0x4d, 0x11, 0xf4, 0xd5, 0x4d, 0x11, 0xf5, 0xd5, 0x4d, 0x11, 0xf6, 0xd5, 0x4d, 0x11, 0xf7,
  90  		0xd5, 0x5c, 0x11, 0xf0, 0xd5, 0x5c, 0x11, 0xf1, 0xd5, 0x5c, 0x11, 0xf2, 0xd5, 0x5c, 0x11, 0xf3,
  91  		0xd5, 0x5c, 0x11, 0xf4, 0xd5, 0x5c, 0x11, 0xf5, 0xd5, 0x5c, 0x11, 0xf6, 0xd5, 0x5c, 0x11, 0xf7,
  92  		0xd5, 0x5d, 0x11, 0xf0, 0xd5, 0x5d, 0x11, 0xf1, 0xd5, 0x5d, 0x11, 0xf2, 0xd5, 0x5d, 0x11, 0xf3,
  93  		0xd5, 0x5d, 0x11, 0xf4, 0xd5, 0x5d, 0x11, 0xf5, 0xd5, 0x5d, 0x11, 0xf6, 0xd5, 0x5d, 0x11, 0xf7,
  94  	};
  95  	const size_t n = sizeof(tbl);
  96  	CYBOZU_TEST_EQUAL(c.getSize(), n);
  97  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
  98  }
  99  
 100  CYBOZU_TEST_AUTO(reg32)
 101  {
 102  	struct Code : Xbyak::CodeGenerator {
 103  		Code()
 104  		{
 105  			adc(r30d, eax); adc(r30d, ecx); adc(r30d, edx); adc(r30d, ebx); adc(r30d, esp); adc(r30d, ebp); adc(r30d, esi); adc(r30d, edi);
 106  			adc(r30d, r8d); adc(r30d, r9d); adc(r30d, r10d); adc(r30d, r11d); adc(r30d, r12d); adc(r30d, r13d); adc(r30d, r14d); adc(r30d, r15d);
 107  			adc(r30d, r16d); adc(r30d, r17d); adc(r30d, r18d); adc(r30d, r19d); adc(r30d, r20d); adc(r30d, r21d); adc(r30d, r22d); adc(r30d, r23d);
 108  			adc(r30d, r24d); adc(r30d, r25d); adc(r30d, r26d); adc(r30d, r27d); adc(r30d, r28d); adc(r30d, r29d); adc(r30d, r30d); adc(r30d, r31d);
 109  			adc(eax, r29d); adc(ecx, r29d); adc(edx, r29d); adc(ebx, r29d); adc(esp, r29d); adc(ebp, r29d); adc(esi, r29d); adc(edi, r29d);
 110  			adc(r8d, r29d); adc(r9d, r29d); adc(r10d, r29d); adc(r11d, r29d); adc(r12d, r29d); adc(r13d, r29d); adc(r14d, r29d); adc(r15d, r29d);
 111  			adc(r16d, r29d); adc(r17d, r29d); adc(r18d, r29d); adc(r19d, r29d); adc(r20d, r29d); adc(r21d, r29d); adc(r22d, r29d); adc(r23d, r29d);
 112  			adc(r24d, r29d); adc(r25d, r29d); adc(r26d, r29d); adc(r27d, r29d); adc(r28d, r29d); adc(r29d, r29d); adc(r30d, r29d); adc(r31d, r29d);
 113  		}
 114  	}  c;
 115  	const uint8_t tbl[] = {
 116  		0xd5, 0x11, 0x11, 0xc6, 0xd5, 0x11, 0x11, 0xce, 0xd5, 0x11, 0x11, 0xd6, 0xd5, 0x11, 0x11, 0xde,
 117  		0xd5, 0x11, 0x11, 0xe6, 0xd5, 0x11, 0x11, 0xee, 0xd5, 0x11, 0x11, 0xf6, 0xd5, 0x11, 0x11, 0xfe,
 118  		0xd5, 0x15, 0x11, 0xc6, 0xd5, 0x15, 0x11, 0xce, 0xd5, 0x15, 0x11, 0xd6, 0xd5, 0x15, 0x11, 0xde,
 119  		0xd5, 0x15, 0x11, 0xe6, 0xd5, 0x15, 0x11, 0xee, 0xd5, 0x15, 0x11, 0xf6, 0xd5, 0x15, 0x11, 0xfe,
 120  		0xd5, 0x51, 0x11, 0xc6, 0xd5, 0x51, 0x11, 0xce, 0xd5, 0x51, 0x11, 0xd6, 0xd5, 0x51, 0x11, 0xde,
 121  		0xd5, 0x51, 0x11, 0xe6, 0xd5, 0x51, 0x11, 0xee, 0xd5, 0x51, 0x11, 0xf6, 0xd5, 0x51, 0x11, 0xfe,
 122  		0xd5, 0x55, 0x11, 0xc6, 0xd5, 0x55, 0x11, 0xce, 0xd5, 0x55, 0x11, 0xd6, 0xd5, 0x55, 0x11, 0xde,
 123  		0xd5, 0x55, 0x11, 0xe6, 0xd5, 0x55, 0x11, 0xee, 0xd5, 0x55, 0x11, 0xf6, 0xd5, 0x55, 0x11, 0xfe,
 124  		0xd5, 0x44, 0x11, 0xe8, 0xd5, 0x44, 0x11, 0xe9, 0xd5, 0x44, 0x11, 0xea, 0xd5, 0x44, 0x11, 0xeb,
 125  		0xd5, 0x44, 0x11, 0xec, 0xd5, 0x44, 0x11, 0xed, 0xd5, 0x44, 0x11, 0xee, 0xd5, 0x44, 0x11, 0xef,
 126  		0xd5, 0x45, 0x11, 0xe8, 0xd5, 0x45, 0x11, 0xe9, 0xd5, 0x45, 0x11, 0xea, 0xd5, 0x45, 0x11, 0xeb,
 127  		0xd5, 0x45, 0x11, 0xec, 0xd5, 0x45, 0x11, 0xed, 0xd5, 0x45, 0x11, 0xee, 0xd5, 0x45, 0x11, 0xef,
 128  		0xd5, 0x54, 0x11, 0xe8, 0xd5, 0x54, 0x11, 0xe9, 0xd5, 0x54, 0x11, 0xea, 0xd5, 0x54, 0x11, 0xeb,
 129  		0xd5, 0x54, 0x11, 0xec, 0xd5, 0x54, 0x11, 0xed, 0xd5, 0x54, 0x11, 0xee, 0xd5, 0x54, 0x11, 0xef,
 130  		0xd5, 0x55, 0x11, 0xe8, 0xd5, 0x55, 0x11, 0xe9, 0xd5, 0x55, 0x11, 0xea, 0xd5, 0x55, 0x11, 0xeb,
 131  		0xd5, 0x55, 0x11, 0xec, 0xd5, 0x55, 0x11, 0xed, 0xd5, 0x55, 0x11, 0xee, 0xd5, 0x55, 0x11, 0xef,
 132  	};
 133  	const size_t n = sizeof(tbl);
 134  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 135  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 136  }
 137  
 138  CYBOZU_TEST_AUTO(reg16)
 139  {
 140  	struct Code : Xbyak::CodeGenerator {
 141  		Code()
 142  		{
 143  			adc(r30w, ax); adc(r30w, cx); adc(r30w, dx); adc(r30w, bx); adc(r30w, sp); adc(r30w, bp); adc(r30w, si); adc(r30w, di);
 144  			adc(r30w, r8w); adc(r30w, r9w); adc(r30w, r10w); adc(r30w, r11w); adc(r30w, r12w); adc(r30w, r13w); adc(r30w, r14w); adc(r30w, r15w);
 145  			adc(r30w, r16w); adc(r30w, r17w); adc(r30w, r18w); adc(r30w, r19w); adc(r30w, r20w); adc(r30w, r21w); adc(r30w, r22w); adc(r30w, r23w);
 146  			adc(r30w, r24w); adc(r30w, r25w); adc(r30w, r26w); adc(r30w, r27w); adc(r30w, r28w); adc(r30w, r29w); adc(r30w, r30w); adc(r30w, r31w);
 147  			adc(ax, r29w); adc(cx, r29w); adc(dx, r29w); adc(bx, r29w); adc(sp, r29w); adc(bp, r29w); adc(si, r29w); adc(di, r29w);
 148  			adc(r8w, r29w); adc(r9w, r29w); adc(r10w, r29w); adc(r11w, r29w); adc(r12w, r29w); adc(r13w, r29w); adc(r14w, r29w); adc(r15w, r29w);
 149  			adc(r16w, r29w); adc(r17w, r29w); adc(r18w, r29w); adc(r19w, r29w); adc(r20w, r29w); adc(r21w, r29w); adc(r22w, r29w); adc(r23w, r29w);
 150  			adc(r24w, r29w); adc(r25w, r29w); adc(r26w, r29w); adc(r27w, r29w); adc(r28w, r29w); adc(r29w, r29w); adc(r30w, r29w); adc(r31w, r29w);
 151  		}
 152  	}  c;
 153  	const uint8_t tbl[] = {
 154  		0x66, 0xd5, 0x11, 0x11, 0xc6, 0x66, 0xd5, 0x11, 0x11, 0xce, 0x66, 0xd5, 0x11, 0x11, 0xd6, 0x66,
 155  		0xd5, 0x11, 0x11, 0xde, 0x66, 0xd5, 0x11, 0x11, 0xe6, 0x66, 0xd5, 0x11, 0x11, 0xee, 0x66, 0xd5,
 156  		0x11, 0x11, 0xf6, 0x66, 0xd5, 0x11, 0x11, 0xfe, 0x66, 0xd5, 0x15, 0x11, 0xc6, 0x66, 0xd5, 0x15,
 157  		0x11, 0xce, 0x66, 0xd5, 0x15, 0x11, 0xd6, 0x66, 0xd5, 0x15, 0x11, 0xde, 0x66, 0xd5, 0x15, 0x11,
 158  		0xe6, 0x66, 0xd5, 0x15, 0x11, 0xee, 0x66, 0xd5, 0x15, 0x11, 0xf6, 0x66, 0xd5, 0x15, 0x11, 0xfe,
 159  		0x66, 0xd5, 0x51, 0x11, 0xc6, 0x66, 0xd5, 0x51, 0x11, 0xce, 0x66, 0xd5, 0x51, 0x11, 0xd6, 0x66,
 160  		0xd5, 0x51, 0x11, 0xde, 0x66, 0xd5, 0x51, 0x11, 0xe6, 0x66, 0xd5, 0x51, 0x11, 0xee, 0x66, 0xd5,
 161  		0x51, 0x11, 0xf6, 0x66, 0xd5, 0x51, 0x11, 0xfe, 0x66, 0xd5, 0x55, 0x11, 0xc6, 0x66, 0xd5, 0x55,
 162  		0x11, 0xce, 0x66, 0xd5, 0x55, 0x11, 0xd6, 0x66, 0xd5, 0x55, 0x11, 0xde, 0x66, 0xd5, 0x55, 0x11,
 163  		0xe6, 0x66, 0xd5, 0x55, 0x11, 0xee, 0x66, 0xd5, 0x55, 0x11, 0xf6, 0x66, 0xd5, 0x55, 0x11, 0xfe,
 164  		0x66, 0xd5, 0x44, 0x11, 0xe8, 0x66, 0xd5, 0x44, 0x11, 0xe9, 0x66, 0xd5, 0x44, 0x11, 0xea, 0x66,
 165  		0xd5, 0x44, 0x11, 0xeb, 0x66, 0xd5, 0x44, 0x11, 0xec, 0x66, 0xd5, 0x44, 0x11, 0xed, 0x66, 0xd5,
 166  		0x44, 0x11, 0xee, 0x66, 0xd5, 0x44, 0x11, 0xef, 0x66, 0xd5, 0x45, 0x11, 0xe8, 0x66, 0xd5, 0x45,
 167  		0x11, 0xe9, 0x66, 0xd5, 0x45, 0x11, 0xea, 0x66, 0xd5, 0x45, 0x11, 0xeb, 0x66, 0xd5, 0x45, 0x11,
 168  		0xec, 0x66, 0xd5, 0x45, 0x11, 0xed, 0x66, 0xd5, 0x45, 0x11, 0xee, 0x66, 0xd5, 0x45, 0x11, 0xef,
 169  		0x66, 0xd5, 0x54, 0x11, 0xe8, 0x66, 0xd5, 0x54, 0x11, 0xe9, 0x66, 0xd5, 0x54, 0x11, 0xea, 0x66,
 170  		0xd5, 0x54, 0x11, 0xeb, 0x66, 0xd5, 0x54, 0x11, 0xec, 0x66, 0xd5, 0x54, 0x11, 0xed, 0x66, 0xd5,
 171  		0x54, 0x11, 0xee, 0x66, 0xd5, 0x54, 0x11, 0xef, 0x66, 0xd5, 0x55, 0x11, 0xe8, 0x66, 0xd5, 0x55,
 172  		0x11, 0xe9, 0x66, 0xd5, 0x55, 0x11, 0xea, 0x66, 0xd5, 0x55, 0x11, 0xeb, 0x66, 0xd5, 0x55, 0x11,
 173  		0xec, 0x66, 0xd5, 0x55, 0x11, 0xed, 0x66, 0xd5, 0x55, 0x11, 0xee, 0x66, 0xd5, 0x55, 0x11, 0xef,
 174  	};
 175  	const size_t n = sizeof(tbl);
 176  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 177  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 178  }
 179  
 180  CYBOZU_TEST_AUTO(reg8)
 181  {
 182  	struct Code : Xbyak::CodeGenerator {
 183  		Code()
 184  		{
 185  			adc(r17b, al); adc(r17b, cl); adc(r17b, dl); adc(r17b, bl); adc(r17b, spl); adc(r17b, bpl); adc(r17b, sil); adc(r17b, dil);
 186  			adc(r17b, r8b); adc(r17b, r9b); adc(r17b, r10b); adc(r17b, r11b); adc(r17b, r12b); adc(r17b, r13b); adc(r17b, r14b); adc(r17b, r15b);
 187  			adc(r17b, r16b); adc(r17b, r17b); adc(r17b, r18b); adc(r17b, r19b); adc(r17b, r20b); adc(r17b, r21b); adc(r17b, r22b); adc(r17b, r23b);
 188  			adc(r17b, r24b); adc(r17b, r25b); adc(r17b, r26b); adc(r17b, r27b); adc(r17b, r28b); adc(r17b, r29b); adc(r17b, r30b); adc(r17b, r31b);
 189  			adc(al, r20b); adc(cl, r20b); adc(dl, r20b); adc(bl, r20b); adc(spl, r20b); adc(bpl, r20b); adc(sil, r20b); adc(dil, r20b);
 190  			adc(r8b, r20b); adc(r9b, r20b); adc(r10b, r20b); adc(r11b, r20b); adc(r12b, r20b); adc(r13b, r20b); adc(r14b, r20b); adc(r15b, r20b);
 191  			adc(r16b, r20b); adc(r17b, r20b); adc(r18b, r20b); adc(r19b, r20b); adc(r20b, r20b); adc(r21b, r20b); adc(r22b, r20b); adc(r23b, r20b);
 192  			adc(r24b, r20b); adc(r25b, r20b); adc(r26b, r20b); adc(r27b, r20b); adc(r28b, r20b); adc(r29b, r20b); adc(r30b, r20b); adc(r31b, r20b);
 193  		}
 194  	}  c;
 195  	const uint8_t tbl[] = {
 196  		0xd5, 0x10, 0x10, 0xc1, 0xd5, 0x10, 0x10, 0xc9, 0xd5, 0x10, 0x10, 0xd1, 0xd5, 0x10, 0x10, 0xd9,
 197  		0xd5, 0x10, 0x10, 0xe1, 0xd5, 0x10, 0x10, 0xe9, 0xd5, 0x10, 0x10, 0xf1, 0xd5, 0x10, 0x10, 0xf9,
 198  		0xd5, 0x14, 0x10, 0xc1, 0xd5, 0x14, 0x10, 0xc9, 0xd5, 0x14, 0x10, 0xd1, 0xd5, 0x14, 0x10, 0xd9,
 199  		0xd5, 0x14, 0x10, 0xe1, 0xd5, 0x14, 0x10, 0xe9, 0xd5, 0x14, 0x10, 0xf1, 0xd5, 0x14, 0x10, 0xf9,
 200  		0xd5, 0x50, 0x10, 0xc1, 0xd5, 0x50, 0x10, 0xc9, 0xd5, 0x50, 0x10, 0xd1, 0xd5, 0x50, 0x10, 0xd9,
 201  		0xd5, 0x50, 0x10, 0xe1, 0xd5, 0x50, 0x10, 0xe9, 0xd5, 0x50, 0x10, 0xf1, 0xd5, 0x50, 0x10, 0xf9,
 202  		0xd5, 0x54, 0x10, 0xc1, 0xd5, 0x54, 0x10, 0xc9, 0xd5, 0x54, 0x10, 0xd1, 0xd5, 0x54, 0x10, 0xd9,
 203  		0xd5, 0x54, 0x10, 0xe1, 0xd5, 0x54, 0x10, 0xe9, 0xd5, 0x54, 0x10, 0xf1, 0xd5, 0x54, 0x10, 0xf9,
 204  		0xd5, 0x40, 0x10, 0xe0, 0xd5, 0x40, 0x10, 0xe1, 0xd5, 0x40, 0x10, 0xe2, 0xd5, 0x40, 0x10, 0xe3,
 205  		0xd5, 0x40, 0x10, 0xe4, 0xd5, 0x40, 0x10, 0xe5, 0xd5, 0x40, 0x10, 0xe6, 0xd5, 0x40, 0x10, 0xe7,
 206  		0xd5, 0x41, 0x10, 0xe0, 0xd5, 0x41, 0x10, 0xe1, 0xd5, 0x41, 0x10, 0xe2, 0xd5, 0x41, 0x10, 0xe3,
 207  		0xd5, 0x41, 0x10, 0xe4, 0xd5, 0x41, 0x10, 0xe5, 0xd5, 0x41, 0x10, 0xe6, 0xd5, 0x41, 0x10, 0xe7,
 208  		0xd5, 0x50, 0x10, 0xe0, 0xd5, 0x50, 0x10, 0xe1, 0xd5, 0x50, 0x10, 0xe2, 0xd5, 0x50, 0x10, 0xe3,
 209  		0xd5, 0x50, 0x10, 0xe4, 0xd5, 0x50, 0x10, 0xe5, 0xd5, 0x50, 0x10, 0xe6, 0xd5, 0x50, 0x10, 0xe7,
 210  		0xd5, 0x51, 0x10, 0xe0, 0xd5, 0x51, 0x10, 0xe1, 0xd5, 0x51, 0x10, 0xe2, 0xd5, 0x51, 0x10, 0xe3,
 211  		0xd5, 0x51, 0x10, 0xe4, 0xd5, 0x51, 0x10, 0xe5, 0xd5, 0x51, 0x10, 0xe6, 0xd5, 0x51, 0x10, 0xe7,
 212  	};
 213  	const size_t n = sizeof(tbl);
 214  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 215  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 216  }
 217  
 218  CYBOZU_TEST_AUTO(rm)
 219  {
 220  	struct Code : Xbyak::CodeGenerator {
 221  		Code()
 222  		{
 223  			adc(r16, ptr [r17+0x40]);
 224  			adc(ptr [r17+0x40], r16);
 225  			adc(r16d, ptr [r17+0x40]);
 226  			adc(ptr [r17+0x40], r16d);
 227  			adc(r16w, ptr [r17+0x40]);
 228  			adc(ptr [r17+0x40], r16w);
 229  			adc(r16b, ptr [r17+0x40]);
 230  			adc(ptr [r17+0x40], r16b);
 231  			adc(r16, ptr [r18*4+0x40]);
 232  			adc(ptr [r18*4+0x40], r16);
 233  			adc(r16d, ptr [r18*4+0x40]);
 234  			adc(ptr [r18*4+0x40], r16d);
 235  			adc(r16w, ptr [r18*4+0x40]);
 236  			adc(ptr [r18*4+0x40], r16w);
 237  			adc(r16b, ptr [r18*4+0x40]);
 238  			adc(ptr [r18*4+0x40], r16b);
 239  			adc(r16, ptr [r17+r18*4+0x40]);
 240  			adc(ptr [r17+r18*4+0x40], r16);
 241  			adc(r16d, ptr [r17+r18*4+0x40]);
 242  			adc(ptr [r17+r18*4+0x40], r16d);
 243  			adc(r16w, ptr [r17+r18*4+0x40]);
 244  			adc(ptr [r17+r18*4+0x40], r16w);
 245  			adc(r16b, ptr [r17+r18*4+0x40]);
 246  			adc(ptr [r17+r18*4+0x40], r16b);
 247  		}
 248  	}  c;
 249  	const uint8_t tbl[] = {
 250  		0xd5, 0x58, 0x13, 0x41, 0x40, 0xd5, 0x58, 0x11, 0x41, 0x40, 0xd5, 0x50, 0x13, 0x41, 0x40, 0xd5,
 251  		0x50, 0x11, 0x41, 0x40, 0x66, 0xd5, 0x50, 0x13, 0x41, 0x40, 0x66, 0xd5, 0x50, 0x11, 0x41, 0x40,
 252  		0xd5, 0x50, 0x12, 0x41, 0x40, 0xd5, 0x50, 0x10, 0x41, 0x40, 0xd5, 0x68, 0x13, 0x04, 0x95, 0x40,
 253  		0x00, 0x00, 0x00, 0xd5, 0x68, 0x11, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0xd5, 0x60, 0x13, 0x04,
 254  		0x95, 0x40, 0x00, 0x00, 0x00, 0xd5, 0x60, 0x11, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0x66, 0xd5,
 255  		0x60, 0x13, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0x66, 0xd5, 0x60, 0x11, 0x04, 0x95, 0x40, 0x00,
 256  		0x00, 0x00, 0xd5, 0x60, 0x12, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0xd5, 0x60, 0x10, 0x04, 0x95,
 257  		0x40, 0x00, 0x00, 0x00, 0xd5, 0x78, 0x13, 0x44, 0x91, 0x40, 0xd5, 0x78, 0x11, 0x44, 0x91, 0x40,
 258  		0xd5, 0x70, 0x13, 0x44, 0x91, 0x40, 0xd5, 0x70, 0x11, 0x44, 0x91, 0x40, 0x66, 0xd5, 0x70, 0x13,
 259  		0x44, 0x91, 0x40, 0x66, 0xd5, 0x70, 0x11, 0x44, 0x91, 0x40, 0xd5, 0x70, 0x12, 0x44, 0x91, 0x40,
 260  		0xd5, 0x70, 0x10, 0x44, 0x91, 0x40,
 261  	};
 262  	const size_t n = sizeof(tbl);
 263  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 264  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 265  }
 266  
 267  CYBOZU_TEST_AUTO(r3)
 268  {
 269  	struct Code : Xbyak::CodeGenerator {
 270  		Code()
 271  		{
 272  			adc(r20b, r21b, r23b);
 273  			adc(r20w, r21w, r23w);
 274  			adc(r20d, r21d, r23d);
 275  			adc(r20, r21, r23);
 276  	}
 277  	}  c;
 278  	const uint8_t tbl[] = {
 279  		0x62, 0xec, 0x5c, 0x10, 0x10, 0xfd, 0x62, 0xec, 0x5d, 0x10, 0x11, 0xfd, 0x62, 0xec, 0x5c, 0x10,
 280  		0x11, 0xfd, 0x62, 0xec, 0xdc, 0x10, 0x11, 0xfd,
 281  	};
 282  	const size_t n = sizeof(tbl);
 283  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 284  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 285  }
 286  
 287  CYBOZU_TEST_AUTO(rm3)
 288  {
 289  	struct Code : Xbyak::CodeGenerator {
 290  		Code()
 291  		{
 292  			adc(rax, r18, ptr [rbx+rcx*4+0x123]);
 293  			adc(rax, ptr [rbx+rcx*4+0x123], r20);
 294  			adc(rax, ptr [r30], r29);
 295  			adc(r11, r13, ptr [r10]);
 296  			adc(r11, r13, ptr [r10*4]);
 297  			adc(r11, ptr [r10*8], r9);
 298  		}
 299  	}  c;
 300  	const uint8_t tbl[] = {
 301  		0x62, 0xe4, 0xfc, 0x18, 0x13, 0x94, 0x8b, 0x23, 0x01, 0x00, 0x00, 0x62, 0xe4, 0xfc, 0x18, 0x11,
 302  		0xa4, 0x8b, 0x23, 0x01, 0x00, 0x00, 0x62, 0x4c, 0xfc, 0x18, 0x11, 0x2e, 0x62, 0x54, 0xa4, 0x18,
 303  		0x13, 0x2a, 0x62, 0x34, 0xa4, 0x18, 0x13, 0x2c, 0x95, 0x00, 0x00, 0x00, 0x00, 0x62, 0x34, 0xa4,
 304  		0x18, 0x11, 0x0c, 0xd5, 0x00, 0x00, 0x00, 0x00,
 305  	};
 306  	const size_t n = sizeof(tbl);
 307  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 308  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 309  }
 310  
 311  CYBOZU_TEST_AUTO(rm3_2)
 312  {
 313  	struct Code : Xbyak::CodeGenerator {
 314  		Code()
 315  		{
 316  			adc(r20b, r21b, r23b);
 317  			adc(r20w, r21w, r23w);
 318  			adc(r20d, r21d, r23d);
 319  			adc(r20, r21, r23);
 320  			adc(r20b, ptr [rax+rcx*4+0x7fffffff], 0x12);
 321  			adc(r20w, ptr [rax+rcx*4+0x7fffffff], 0x1234);
 322  			adc(r20d, ptr [rax+rcx*4+0x7fffffff], 0x12345678);
 323  			adc(r20, ptr [rax+rcx*4+0x7fffffff], 0x12345678);
 324  			adc(r20b, al, 0x12);
 325  			adc(r20w, ax, 0x1234);
 326  			adc(r20d, eax, 0x12345678);
 327  			adc(r20, rax, 0x12345678);
 328  		}
 329  	}  c;
 330  	const uint8_t tbl[] = {
 331  		0x62, 0xec, 0x5c, 0x10, 0x10, 0xfd, 0x62, 0xec, 0x5d, 0x10, 0x11, 0xfd, 0x62, 0xec, 0x5c, 0x10,
 332  		0x11, 0xfd, 0x62, 0xec, 0xdc, 0x10, 0x11, 0xfd, 0x62, 0xf4, 0x5c, 0x10, 0x80, 0x94, 0x88, 0xff,
 333  		0xff, 0xff, 0x7f, 0x12, 0x62, 0xf4, 0x5d, 0x10, 0x81, 0x94, 0x88, 0xff, 0xff, 0xff, 0x7f, 0x34,
 334  		0x12, 0x62, 0xf4, 0x5c, 0x10, 0x81, 0x94, 0x88, 0xff, 0xff, 0xff, 0x7f, 0x78, 0x56, 0x34, 0x12,
 335  		0x62, 0xf4, 0xdc, 0x10, 0x81, 0x94, 0x88, 0xff, 0xff, 0xff, 0x7f, 0x78, 0x56, 0x34, 0x12, 0x62,
 336  		0xf4, 0x5c, 0x10, 0x80, 0xd0, 0x12, 0x62, 0xf4, 0x5d, 0x10, 0x81, 0xd0, 0x34, 0x12, 0x62, 0xf4,
 337  		0x5c, 0x10, 0x81, 0xd0, 0x78, 0x56, 0x34, 0x12, 0x62, 0xf4, 0xdc, 0x10, 0x81, 0xd0, 0x78, 0x56,
 338  		0x34, 0x12,
 339  	};
 340  	const size_t n = sizeof(tbl);
 341  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 342  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 343  }
 344  
 345  CYBOZU_TEST_AUTO(adcx_adox)
 346  {
 347  	struct Code : Xbyak::CodeGenerator {
 348  		Code()
 349  		{
 350  			adcx(rax, r30);
 351  			adcx(ecx, r20d);
 352  			adcx(ecx, ptr [r31+r29*4]);
 353  			adcx(r20d, ptr [rax]);
 354  			adcx(r16, ptr [r31+r29*4]);
 355  			adcx(r17, ptr [rax]);
 356  			adcx(rax, rcx, rdx);
 357  
 358  			adox(rax, r30);
 359  			adox(ecx, r20d);
 360  			adox(ecx, ptr [r31+r29*4]);
 361  			adox(r20d, ptr [rax]);
 362  			adox(r16, ptr [r31+r29*4]);
 363  			adox(r17, ptr [rax]);
 364  			adox(rax, rcx, rdx);
 365  		}
 366  	} c;
 367  	const uint8_t tbl[] = {
 368  		0x62, 0xdc, 0xfd, 0x08, 0x66, 0xc6, 0x62, 0xfc, 0x7d, 0x08, 0x66, 0xcc, 0x62, 0x9c, 0x79, 0x08,
 369  		0x66, 0x0c, 0xaf, 0x62, 0xe4, 0x7d, 0x08, 0x66, 0x20, 0x62, 0x8c, 0xf9, 0x08, 0x66, 0x04, 0xaf,
 370  		0x62, 0xe4, 0xfd, 0x08, 0x66, 0x08, 0x62, 0xf4, 0xfd, 0x18, 0x66, 0xca,
 371  
 372  		0x62, 0xdc, 0xfe, 0x08, 0x66, 0xc6, 0x62, 0xfc, 0x7e, 0x08, 0x66, 0xcc, 0x62, 0x9c, 0x7a, 0x08,
 373  		0x66, 0x0c, 0xaf, 0x62, 0xe4, 0x7e, 0x08, 0x66, 0x20, 0x62, 0x8c, 0xfa, 0x08, 0x66, 0x04, 0xaf,
 374  		0x62, 0xe4, 0xfe, 0x08, 0x66, 0x08, 0x62, 0xf4, 0xfe, 0x18, 0x66, 0xca,
 375  	};
 376  	const size_t n = sizeof(tbl);
 377  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 378  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 379  }
 380  
 381  CYBOZU_TEST_AUTO(r3_2)
 382  {
 383  	struct Code : Xbyak::CodeGenerator {
 384  		Code()
 385  		{
 386  			add(rax, rcx, rdx);
 387  			adc(rax, rcx, rdx);
 388  			and_(rax, rcx, rdx);
 389  			or_(rax, rcx, rdx);
 390  			sbb(rax, rcx, rdx);
 391  			sub(rax, rcx, rdx);
 392  			xor_(rax, rcx, rdx);
 393  			add(r30, ptr [r20], r9);
 394  			adc(r30, ptr [r20], r9);
 395  			and_(r30, ptr [r20], r9);
 396  			or_(r30, ptr [r20], r9);
 397  			sbb(r30, ptr [r20], r9);
 398  			sub(r30, ptr [r20], r9);
 399  			xor_(r30, ptr [r20], r9);
 400  		}
 401  	} c;
 402  	const uint8_t tbl[] = {
 403  		0x62, 0xf4, 0xfc, 0x18, 0x01, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x11, 0xd1, 0x62, 0xf4, 0xfc, 0x18,
 404  		0x21, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x09, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x19, 0xd1, 0x62, 0xf4,
 405  		0xfc, 0x18, 0x29, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x31, 0xd1, 0x62, 0x7c, 0x8c, 0x10, 0x01, 0x0c,
 406  		0x24, 0x62, 0x7c, 0x8c, 0x10, 0x11, 0x0c, 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x21, 0x0c, 0x24, 0x62,
 407  		0x7c, 0x8c, 0x10, 0x09, 0x0c, 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x19, 0x0c, 0x24, 0x62, 0x7c, 0x8c,
 408  		0x10, 0x29, 0x0c, 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x31, 0x0c, 0x24,
 409  	};
 410  	const size_t n = sizeof(tbl);
 411  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 412  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 413  }
 414  
 415  CYBOZU_TEST_AUTO(NF)
 416  {
 417  	struct Code : Xbyak::CodeGenerator {
 418  		Code()
 419  		{
 420  			add(rax, rcx, rdx);
 421  			add(rax|T_nf, rcx, rdx);
 422  			and_(rax, rcx, rdx);
 423  			and_(rax|T_nf, rcx, rdx);
 424  			or_(rax, rcx, rdx);
 425  			or_(rax|T_nf, rcx, rdx);
 426  			sub(rax, rcx, rdx);
 427  			sub(rax|T_nf, rcx, rdx);
 428  			xor_(rax, rcx, rdx);
 429  			xor_(rax|T_nf, rcx, rdx);
 430  
 431  			add(rax, rcx, 3);
 432  			add(rax|T_nf, rcx, 3);
 433  			and_(rax, rcx, 3);
 434  			and_(rax|T_nf, rcx, 3);
 435  			or_(rax, rcx, 3);
 436  			or_(rax|T_nf, rcx, 3);
 437  			sub(rax, rcx, 3);
 438  			sub(rax|T_nf, rcx, 3);
 439  			xor_(rax, rcx, 3);
 440  			xor_(rax|T_nf, rcx, 3);
 441  		}
 442  	} c;
 443  	const uint8_t tbl[] = {
 444  		0x62, 0xf4, 0xfc, 0x18, 0x01, 0xd1,
 445  		0x62, 0xf4, 0xfc, 0x1c, 0x01, 0xd1,
 446  
 447  		0x62, 0xf4, 0xfc, 0x18, 0x21, 0xd1,
 448  		0x62, 0xf4, 0xfc, 0x1c, 0x21, 0xd1,
 449  
 450  		0x62, 0xf4, 0xfc, 0x18, 0x09, 0xd1,
 451  		0x62, 0xf4, 0xfc, 0x1c, 0x09, 0xd1,
 452  
 453  		0x62, 0xf4, 0xfc, 0x18, 0x29, 0xd1,
 454  		0x62, 0xf4, 0xfc, 0x1c, 0x29, 0xd1,
 455  
 456  		0x62, 0xf4, 0xfc, 0x18, 0x31, 0xd1,
 457  		0x62, 0xf4, 0xfc, 0x1c, 0x31, 0xd1,
 458  
 459  		0x62, 0xf4, 0xfc, 0x18, 0x83, 0xc1, 0x03,
 460  		0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xc1, 0x03,
 461  
 462  		0x62, 0xf4, 0xfc, 0x18, 0x83, 0xe1, 0x03,
 463  		0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xe1, 0x03,
 464  
 465  		0x62, 0xf4, 0xfc, 0x18, 0x83, 0xc9, 0x03,
 466  		0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xc9, 0x03,
 467  
 468  		0x62, 0xf4, 0xfc, 0x18, 0x83, 0xe9, 0x03,
 469  		0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xe9, 0x03,
 470  
 471  		0x62, 0xf4, 0xfc, 0x18, 0x83, 0xf1, 0x03,
 472  		0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xf1, 0x03,
 473  	};
 474  	const size_t n = sizeof(tbl);
 475  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 476  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 477  }
 478  
 479  CYBOZU_TEST_AUTO(andn_etc)
 480  {
 481  	struct Code : Xbyak::CodeGenerator {
 482  		Code()
 483  		{
 484  			andn(r29, r30, r31);
 485  			andn(r29|T_nf, r30, r31);
 486  			andn(eax, ecx, r17d); // 32bit
 487  			andn(r29, r30, ptr [r31+r20*4]);
 488  
 489  			mulx(eax, ecx, r17d);
 490  			mulx(r29, r30, r31);
 491  			mulx(r29, r30, ptr [r31+r20*4]);
 492  			pdep(eax, ecx, r17d);
 493  			pdep(r29, r30, r31);
 494  			pdep(r29, r30, ptr [r31+r20*4]);
 495  			pext(eax, ecx, r17d);
 496  			pext(r29, r30, r31);
 497  			pext(r29, r30, ptr [r31+r20*4]);
 498  
 499  		}
 500  	} c;
 501  	const uint8_t tbl[] = {
 502  		// andn
 503  		0x62, 0x4a, 0x8c, 0x00, 0xf2, 0xef,
 504  		0x62, 0x4a, 0x8c, 0x04, 0xf2, 0xef,
 505  		0x62, 0xfa, 0x74, 0x08, 0xf2, 0xc1,
 506  		0x62, 0x4a, 0x88, 0x00, 0xf2, 0x2c, 0xa7,
 507  
 508  		// mulx, pdep, pext
 509  		0x62, 0xfa, 0x77, 0x08, 0xf6, 0xc1, 0x62, 0x4a, 0x8f, 0x00, 0xf6, 0xef, 0x62, 0x4a, 0x8b, 0x00, 0xf6, 0x2c, 0xa7,
 510  		0x62, 0xfa, 0x77, 0x08, 0xf5, 0xc1, 0x62, 0x4a, 0x8f, 0x00, 0xf5, 0xef, 0x62, 0x4a, 0x8b, 0x00, 0xf5, 0x2c, 0xa7,
 511  		0x62, 0xfa, 0x76, 0x08, 0xf5, 0xc1, 0x62, 0x4a, 0x8e, 0x00, 0xf5, 0xef, 0x62, 0x4a, 0x8a, 0x00, 0xf5, 0x2c, 0xa7,
 512  	};
 513  	const size_t n = sizeof(tbl);
 514  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 515  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 516  }
 517  
 518  CYBOZU_TEST_AUTO(bextr_etc)
 519  {
 520  	struct Code : Xbyak::CodeGenerator {
 521  		Code()
 522  		{
 523  			bextr(r29, r30, r31);
 524  			bextr(r29|T_nf, r30, r31);
 525  			bextr(eax, ecx, r17d);
 526  			bextr(r29, ptr [r31+r20*4], r30);
 527  
 528  			bzhi(r29, r30, r31);
 529  			bzhi(r29|T_nf, r30, r31);
 530  			bzhi(eax, ecx, r17d);
 531  			bzhi(r29, ptr [r31+r20*4], r30);
 532  
 533  			sarx(r29, r30, r31);
 534  			sarx(eax, ecx, r17d);
 535  			sarx(r29, ptr [r31+r20*4], r30);
 536  
 537  			shlx(r29, r30, r31);
 538  			shlx(eax, ecx, r17d);
 539  			shlx(r29, ptr [r31+r20*4], r30);
 540  
 541  			shrx(r29, r30, r31);
 542  			shrx(eax, ecx, r17d);
 543  			shrx(r29, ptr [r31+r20*4], r30);
 544  
 545  			blsi(r30, r31);
 546  			blsi(r30|T_nf, r31);
 547  			blsi(ecx, r17d);
 548  			blsi(r30, ptr [r31+r20*4]);
 549  
 550  			blsmsk(r30, r31);
 551  			blsmsk(r30|T_nf, r31);
 552  			blsmsk(ecx, r17d);
 553  			blsmsk(r30, ptr [r31+r20*4]);
 554  
 555  			blsr(r30, r31);
 556  			blsr(r30|T_nf, r31);
 557  			blsr(ecx, r17d);
 558  			blsr(r30, ptr [r31+r20*4]);
 559  
 560  			rorx(r30, r31, 3);
 561  			rorx(ecx, r17d, 5);
 562  			rorx(r30, ptr [r31+r20*4], 4);
 563  		}
 564  	} c;
 565  	const uint8_t tbl[] = {
 566  		// bextr
 567  		0x62, 0x4a, 0x84, 0x00, 0xf7, 0xee,
 568  		0x62, 0x4a, 0x84, 0x04, 0xf7, 0xee,
 569  		0x62, 0xf2, 0x74, 0x00, 0xf7, 0xc1,
 570  		0x62, 0x4a, 0x88, 0x00, 0xf7, 0x2c, 0xa7,
 571  
 572  		// bzhi
 573  		0x62, 0x4a, 0x84, 0x00, 0xf5, 0xee,
 574  		0x62, 0x4a, 0x84, 0x04, 0xf5, 0xee,
 575  		0x62, 0xf2, 0x74, 0x00, 0xf5, 0xc1,
 576  		0x62, 0x4a, 0x88, 0x00, 0xf5, 0x2c, 0xa7,
 577  
 578  		// sarx
 579  		0x62, 0x4a, 0x86, 0x00, 0xf7, 0xee,
 580  		0x62, 0xf2, 0x76, 0x00, 0xf7, 0xc1,
 581  		0x62, 0x4a, 0x8a, 0x00, 0xf7, 0x2c, 0xa7,
 582  
 583  		// shlx
 584  		0x62, 0x4a, 0x85, 0x00, 0xf7, 0xee,
 585  		0x62, 0xf2, 0x75, 0x00, 0xf7, 0xc1,
 586  		0x62, 0x4a, 0x89, 0x00, 0xf7, 0x2c, 0xa7,
 587  
 588  		// shrx
 589  		0x62, 0x4a, 0x87, 0x00, 0xf7, 0xee,
 590  		0x62, 0xf2, 0x77, 0x00, 0xf7, 0xc1,
 591  		0x62, 0x4a, 0x8b, 0x00, 0xf7, 0x2c, 0xa7,
 592  
 593  		// blsi
 594  		0x62, 0xda, 0x8c, 0x00, 0xf3, 0xdf,
 595  		0x62, 0xda, 0x8c, 0x04, 0xf3, 0xdf,
 596  		0x62, 0xfa, 0x74, 0x08, 0xf3, 0xd9,
 597  		0x62, 0xda, 0x88, 0x00, 0xf3, 0x1c, 0xa7,
 598  
 599  		// blsmsk
 600  		0x62, 0xda, 0x8c, 0x00, 0xf3, 0xd7,
 601  		0x62, 0xda, 0x8c, 0x04, 0xf3, 0xd7,
 602  		0x62, 0xfa, 0x74, 0x08, 0xf3, 0xd1,
 603  		0x62, 0xda, 0x88, 0x00, 0xf3, 0x14, 0xa7,
 604  
 605  		// blsr
 606  		0x62, 0xda, 0x8c, 0x00, 0xf3, 0xcf,
 607  		0x62, 0xda, 0x8c, 0x04, 0xf3, 0xcf,
 608  		0x62, 0xfa, 0x74, 0x08, 0xf3, 0xc9,
 609  		0x62, 0xda, 0x88, 0x00, 0xf3, 0x0c, 0xa7,
 610  
 611  		// rorx
 612  		0x62, 0x4b, 0xff, 0x08, 0xf0, 0xf7, 0x03,
 613  		0x62, 0xfb, 0x7f, 0x08, 0xf0, 0xc9, 0x05,
 614  		0x62, 0x4b, 0xfb, 0x08, 0xf0, 0x34, 0xa7, 0x04,
 615  	};
 616  	const size_t n = sizeof(tbl);
 617  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 618  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 619  }
 620  
 621  CYBOZU_TEST_AUTO(bit)
 622  {
 623  	struct Code : Xbyak::CodeGenerator {
 624  		Code()
 625  		{
 626  			// adc
 627  			adc(r20b, r21b, r22b);
 628  			adc(r20w, r21w, r22w);
 629  			adc(r20d, r21d, r22d);
 630  			adc(r20, r21, r22);
 631  
 632  			adc(r20b, r21b);
 633  			adc(r20w, r21w);
 634  			adc(r20d, r21d);
 635  			adc(r20, r21);
 636  
 637  			adc(r20b, r21b, 0x3);
 638  			adc(r20w, r21w, 0x3);
 639  			adc(r20d, r21d, 0x3);
 640  			adc(r20, r21, 0x3);
 641  
 642  			adc(r20b, 0x3);
 643  			adc(r20w, 0x3);
 644  			adc(r20d, 0x3);
 645  			adc(r20, 0x3);
 646  
 647  			// add
 648  			add(r20b, r21b, r22b);
 649  			add(r20w, r21w, r22w);
 650  			add(r20d, r21d, r22d);
 651  			add(r20, r21, r22);
 652  			add(r20b, r21b);
 653  			add(r20w, r21w);
 654  			add(r20d, r21d);
 655  			add(r20, r21);
 656  			add(r20b, r21b, 0x3);
 657  			add(r20w, r21w, 0x3);
 658  			add(r20d, r21d, 0x3);
 659  			add(r20, r21, 0x3);
 660  			add(r20b, 0x3);
 661  			add(r20w, 0x3);
 662  			add(r20d, 0x3);
 663  			add(r20, 0x3);
 664  		}
 665  	} c;
 666  	const uint8_t tbl[] = {
 667  		// adc
 668  		0x62, 0xec, 0x5c, 0x10, 0x10, 0xf5, 0x62, 0xec, 0x5d, 0x10, 0x11, 0xf5, 0x62, 0xec, 0x5c, 0x10,
 669  		0x11, 0xf5, 0x62, 0xec, 0xdc, 0x10, 0x11, 0xf5, 0xd5, 0x50, 0x10, 0xec, 0x66, 0xd5, 0x50, 0x11,
 670  		0xec, 0xd5, 0x50, 0x11, 0xec, 0xd5, 0x58, 0x11, 0xec, 0x62, 0xfc, 0x5c, 0x10, 0x80, 0xd5, 0x03,
 671  		0x62, 0xfc, 0x5d, 0x10, 0x83, 0xd5, 0x03, 0x62, 0xfc, 0x5c, 0x10, 0x83, 0xd5, 0x03, 0x62, 0xfc,
 672  		0xdc, 0x10, 0x83, 0xd5, 0x03, 0xd5, 0x10, 0x80, 0xd4, 0x03, 0x66, 0xd5, 0x10, 0x83, 0xd4, 0x03,
 673  		0xd5, 0x10, 0x83, 0xd4, 0x03, 0xd5, 0x18, 0x83, 0xd4, 0x03,
 674  
 675  		// add
 676  		0x62, 0xec, 0x5c, 0x10, 0x00, 0xf5, 0x62, 0xec, 0x5d, 0x10, 0x01, 0xf5, 0x62, 0xec, 0x5c, 0x10,
 677  		0x01, 0xf5, 0x62, 0xec, 0xdc, 0x10, 0x01, 0xf5, 0xd5, 0x50, 0x00, 0xec, 0x66, 0xd5, 0x50, 0x01,
 678  		0xec, 0xd5, 0x50, 0x01, 0xec, 0xd5, 0x58, 0x01, 0xec, 0x62, 0xfc, 0x5c, 0x10, 0x80, 0xc5, 0x03,
 679  		0x62, 0xfc, 0x5d, 0x10, 0x83, 0xc5, 0x03, 0x62, 0xfc, 0x5c, 0x10, 0x83, 0xc5, 0x03, 0x62, 0xfc,
 680  		0xdc, 0x10, 0x83, 0xc5, 0x03, 0xd5, 0x10, 0x80, 0xc4, 0x03, 0x66, 0xd5, 0x10, 0x83, 0xc4, 0x03,
 681  		0xd5, 0x10, 0x83, 0xc4, 0x03, 0xd5, 0x18, 0x83, 0xc4, 0x03,
 682  	};
 683  	const size_t n = sizeof(tbl);
 684  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 685  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 686  }
 687  
 688  CYBOZU_TEST_AUTO(inc_dec)
 689  {
 690  	struct Code : Xbyak::CodeGenerator {
 691  		Code()
 692  		{
 693  			inc(r30b);
 694  			inc(r30w);
 695  			inc(r30d);
 696  			inc(r30);
 697  			inc(r30b, r31b);
 698  			inc(r30w, r31w);
 699  			inc(r30w|T_nf, r31w);
 700  			inc(r30d, r31d);
 701  			inc(r30, r31);
 702  			inc(r30, ptr [r31]);
 703  
 704  			dec(r30b);
 705  			dec(r30w);
 706  			dec(r30d);
 707  			dec(r30);
 708  			dec(r30b, r31b);
 709  			dec(r30w, r31w);
 710  			dec(r30w|T_nf, r31w);
 711  			dec(r30d, r31d);
 712  			dec(r30, r31);
 713  			dec(r30, ptr [r31]);
 714  		}
 715  	} c;
 716  	const uint8_t tbl[] = {
 717  		// inc
 718  		0xd5, 0x11, 0xfe, 0xc6, 0x66, 0xd5, 0x11, 0xff, 0xc6, 0xd5, 0x11, 0xff, 0xc6, 0xd5, 0x19, 0xff,
 719  		0xc6, 0x62, 0xdc, 0x0c, 0x10, 0xfe, 0xc7,
 720  		0x62, 0xdc, 0x0d, 0x10, 0xff, 0xc7,
 721  		0x62, 0xdc, 0x0d, 0x14, 0xff, 0xc7, // T_nf
 722  		0x62, 0xdc, 0x0c, 0x10, 0xff, 0xc7, 0x62, 0xdc, 0x8c, 0x10, 0xff, 0xc7, 0x62,
 723  		0xdc, 0x8c, 0x10, 0xff, 0x07,
 724  		// decA
 725  		0xd5, 0x11, 0xfe, 0xce, 0x66, 0xd5, 0x11, 0xff, 0xce, 0xd5, 0x11, 0xff, 0xce, 0xd5, 0x19, 0xff,
 726  		0xce, 0x62, 0xdc, 0x0c, 0x10, 0xfe, 0xcf,
 727  		0x62, 0xdc, 0x0d, 0x10, 0xff, 0xcf,
 728  		0x62, 0xdc, 0x0d, 0x14, 0xff, 0xcf, // T_nf
 729  		0x62, 0xdc, 0x0c, 0x10, 0xff, 0xcf, 0x62, 0xdc, 0x8c, 0x10, 0xff, 0xcf, 0x62,
 730  		0xdc, 0x8c, 0x10, 0xff, 0x0f,
 731  
 732  	};
 733  	const size_t n = sizeof(tbl);
 734  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 735  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 736  }
 737  
 738  CYBOZU_TEST_AUTO(div_op1)
 739  {
 740  	struct Code : Xbyak::CodeGenerator {
 741  		Code()
 742  		{
 743  			div(r20b);
 744  			div(r20d);
 745  			div(r20w);
 746  			div(r20);
 747  			div(r20|T_nf);
 748  			div(eax|T_nf);
 749  			div(byte [r20+r30*1]);
 750  			div(word [r20+r30*1]);
 751  			div(dword [r20+r30*1]);
 752  			div(qword [r20+r30*1]);
 753  
 754  			idiv(r20b);
 755  			idiv(r20d);
 756  			idiv(r20w);
 757  			idiv(r20);
 758  			idiv(r20|T_nf);
 759  			idiv(eax|T_nf);
 760  			idiv(byte [r20+r30*1]);
 761  			idiv(word [r20+r30*1]);
 762  			idiv(dword [r20+r30*1]);
 763  			idiv(qword [r20+r30*1]);
 764  
 765  			imul(r20b);
 766  			imul(r20d);
 767  			imul(r20w);
 768  			imul(r20);
 769  			imul(r20|T_nf);
 770  			imul(eax|T_nf);
 771  			imul(byte [r20+r30*1]);
 772  			imul(word [r20+r30*1]);
 773  			imul(dword [r20+r30*1]);
 774  			imul(qword [r20+r30*1]);
 775  
 776  			mul(r20b);
 777  			mul(r20d);
 778  			mul(r20w);
 779  			mul(r20);
 780  			mul(r20|T_nf);
 781  			mul(eax|T_nf);
 782  			mul(byte [r20+r30*1]);
 783  			mul(word [r20+r30*1]);
 784  			mul(dword [r20+r30*1]);
 785  			mul(qword [r20+r30*1]);
 786  
 787  			neg(r20b);
 788  			neg(r20d);
 789  			neg(r20w);
 790  			neg(r20);
 791  			neg(r20|T_nf);
 792  			neg(eax|T_nf);
 793  			neg(byte [r20+r30*1]);
 794  			neg(word [r20+r30*1]);
 795  			neg(dword [r20+r30*1]);
 796  			neg(qword [r20+r30*1]);
 797  
 798  			// not_ does not have NF=1
 799  			not_(r20b);
 800  			not_(r20d);
 801  			not_(r20w);
 802  			not_(r20);
 803  			not_(byte [r20+r30*1]);
 804  			not_(word [r20+r30*1]);
 805  			not_(dword [r20+r30*1]);
 806  			not_(qword [r20+r30*1]);
 807  		}
 808  	} c;
 809  	const uint8_t tbl[] = {
 810  		0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xf4,
 811  		0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xf4,
 812  		0x62, 0xfc, 0x7d, 0x08, 0xf7, 0xf4,
 813  		0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xf4,
 814  		0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xf4, // r20|T_nf
 815  		0x62, 0xf4, 0x7c, 0x0c, 0xf7, 0xf0, // eax|T_nf
 816  		0x62, 0xbc, 0x78, 0x08, 0xf6, 0x34, 0x34,
 817  		0x62, 0xbc, 0x79, 0x08, 0xf7, 0x34, 0x34,
 818  		0x62, 0xbc, 0x78, 0x08, 0xf7, 0x34, 0x34,
 819  		0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x34, 0x34,
 820  
 821  		0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xfc, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xfc, 0x62, 0xfc, 0x7d, 0x08,
 822  		0xf7, 0xfc, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xfc, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xfc, 0x62, 0xf4,
 823  		0x7c, 0x0c, 0xf7, 0xf8, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x3c, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
 824  		0x3c, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x3c, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x3c, 0x34,
 825  
 826  		0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xec, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xec, 0x62, 0xfc, 0x7d, 0x08,
 827  		0xf7, 0xec, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xec, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xec, 0x62, 0xf4,
 828  		0x7c, 0x0c, 0xf7, 0xe8, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x2c, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
 829  		0x2c, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x2c, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x2c, 0x34,
 830  		0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xe4, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xe4, 0x62, 0xfc, 0x7d, 0x08,
 831  		0xf7, 0xe4, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xe4, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xe4, 0x62, 0xf4,
 832  		0x7c, 0x0c, 0xf7, 0xe0, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x24, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
 833  		0x24, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x24, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x24, 0x34,
 834  		0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xdc, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xdc, 0x62, 0xfc, 0x7d, 0x08,
 835  		0xf7, 0xdc, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xdc, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xdc, 0x62, 0xf4,
 836  		0x7c, 0x0c, 0xf7, 0xd8, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x1c, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
 837  		0x1c, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x1c, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x1c, 0x34,
 838  		0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xd4, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xd4, 0x62, 0xfc, 0x7d, 0x08,
 839  		0xf7, 0xd4, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xd4, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x14, 0x34, 0x62,
 840  		0xbc, 0x79, 0x08, 0xf7, 0x14, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x14, 0x34, 0x62, 0xbc, 0xf8,
 841  		0x08, 0xf7, 0x14, 0x34,
 842  	};
 843  	const size_t n = sizeof(tbl);
 844  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 845  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 846  }
 847  
 848  CYBOZU_TEST_AUTO(imul_2op)
 849  {
 850  	struct Code : Xbyak::CodeGenerator {
 851  		Code()
 852  		{
 853  			// imul(r30b, al); // QQQ : not supported?
 854  			imul(r30w, ax);
 855  			imul(r30d, eax);
 856  			imul(r30, rax);
 857  			imul(r30|T_nf, rax);
 858  			imul(rcx|T_nf, rax);
 859  			imul(rcx, ptr [r30]);
 860  
 861  			neg(r30b, al);
 862  			neg(r30w, ax);
 863  			neg(r30d, eax);
 864  			neg(r30, rax);
 865  			neg(r30|T_nf, rax);
 866  			neg(rcx|T_nf, rax);
 867  			neg(rcx, ptr [r30]);
 868  
 869  			not_(r30b, al);
 870  			not_(r30w, ax);
 871  			not_(r30d, eax);
 872  			not_(r30, rax);
 873  			not_(rcx, ptr [r30]);
 874  		}
 875  	} c;
 876  	const uint8_t tbl[] = {
 877  		// imul
 878  		0x62, 0x64, 0x7d, 0x08, 0xaf, 0xf0,
 879  		0x62, 0x64, 0x7c, 0x08, 0xaf, 0xf0,
 880  		0x62, 0x64, 0xfc, 0x08, 0xaf, 0xf0,
 881  		0x62, 0x64, 0xfc, 0x0c, 0xaf, 0xf0,
 882  		0x62, 0xf4, 0xfc, 0x0c, 0xaf, 0xc8,
 883  		0x62, 0xdc, 0xfc, 0x08, 0xaf, 0x0e,
 884  
 885  		// neg
 886  		0x62, 0xf4, 0x0c, 0x10, 0xf6, 0xd8, 0x62, 0xf4, 0x0d, 0x10, 0xf7, 0xd8, 0x62, 0xf4, 0x0c, 0x10,
 887  		0xf7, 0xd8, 0x62, 0xf4, 0x8c, 0x10, 0xf7, 0xd8, 0x62, 0xf4, 0x8c, 0x14, 0xf7, 0xd8, 0x62, 0xf4,
 888  		0xf4, 0x1c, 0xf7, 0xd8, 0x62, 0xdc, 0xf4, 0x18, 0xf7, 0x1e,
 889  
 890  		// not
 891  		0x62, 0xf4, 0x0c, 0x10, 0xf6, 0xd0, 0x62, 0xf4, 0x0d, 0x10, 0xf7, 0xd0, 0x62, 0xf4, 0x0c, 0x10,
 892  		0xf7, 0xd0, 0x62, 0xf4, 0x8c, 0x10, 0xf7, 0xd0, 0x62, 0xdc, 0xf4, 0x18, 0xf7, 0x16,
 893  	};
 894  	const size_t n = sizeof(tbl);
 895  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 896  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 897  }
 898  
 899  CYBOZU_TEST_AUTO(imul_zu)
 900  {
 901  	struct Code : Xbyak::CodeGenerator {
 902  		Code()
 903  		{
 904  			imul(ax|T_zu, cx, 0x1234);
 905  			imul(ax|T_nf, cx, 0x1234);
 906  			imul(ax|T_zu|T_nf, cx, 0x1234);
 907  			imul(r30w, ax, 0x1234);
 908  			imul(r30d, eax, 0x12345678);
 909  			imul(r30, rax, 0x12345678);
 910  			imul(r30|T_zu, rax, 0x12345678);
 911  			imul(r30|T_nf, rax, 0x12345678);
 912  			imul(r30|T_nf|T_zu, rax, 0x12345678);
 913  			imul(rcx, ptr [r30], 0x12345678);
 914  		}
 915  	} c;
 916  	const uint8_t tbl[] = {
 917  		0x62, 0xf4, 0x7d, 0x18, 0x69, 0xc1, 0x34, 0x12, // T_zu
 918  		0x62, 0xf4, 0x7d, 0x0c, 0x69, 0xc1, 0x34, 0x12, // T_nf
 919  		0x62, 0xf4, 0x7d, 0x1c, 0x69, 0xc1, 0x34, 0x12, // T_zu|T_nf
 920  		0x62, 0x64, 0x7d, 0x08, 0x69, 0xf0, 0x34, 0x12, // w
 921  		0x62, 0x64, 0x7c, 0x08, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // d
 922  		0x62, 0x64, 0xfc, 0x08, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12,
 923  		0x62, 0x64, 0xfc, 0x18, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // T_zu
 924  		0x62, 0x64, 0xfc, 0x0c, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // T_nf
 925  		0x62, 0x64, 0xfc, 0x1c, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // T_nf|T_zu
 926  		0x62, 0xdc, 0xfc, 0x08, 0x69, 0x0e, 0x78, 0x56, 0x34, 0x12,
 927  
 928  	};
 929  	const size_t n = sizeof(tbl);
 930  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 931  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 932  }
 933  
 934  CYBOZU_TEST_AUTO(lzcnt)
 935  {
 936  	struct Code : Xbyak::CodeGenerator {
 937  		Code()
 938  		{
 939  			lzcnt(r16w, r17w);
 940  			lzcnt(r16d, r17d);
 941  			lzcnt(r16, r17);
 942  			lzcnt(r16|T_nf, r17);
 943  			lzcnt(rax|T_nf, rcx);
 944  			lzcnt(rax, ptr [r18]);
 945  
 946  			tzcnt(r16w, r17w);
 947  			tzcnt(r16d, r17d);
 948  			tzcnt(r16, r17);
 949  			tzcnt(r16|T_nf, r17);
 950  			tzcnt(rax|T_nf, rcx);
 951  			tzcnt(rax, ptr [r18]);
 952  		}
 953  	} c;
 954  	const uint8_t tbl[] = {
 955  		// lzcnt
 956  		0x62, 0xec, 0x7d, 0x08, 0xf5, 0xc1, 0x62, 0xec, 0x7c, 0x08, 0xf5, 0xc1, 0x62, 0xec, 0xfc, 0x08,
 957  		0xf5, 0xc1, 0x62, 0xec, 0xfc, 0x0c, 0xf5, 0xc1, 0x62, 0xf4, 0xfc, 0x0c, 0xf5, 0xc1, 0x62, 0xfc,
 958  		0xfc, 0x08, 0xf5, 0x02,
 959  		// tzcnt
 960  		0x62, 0xec, 0x7d, 0x08, 0xf4, 0xc1, 0x62, 0xec, 0x7c, 0x08, 0xf4, 0xc1, 0x62, 0xec, 0xfc, 0x08,
 961  		0xf4, 0xc1, 0x62, 0xec, 0xfc, 0x0c, 0xf4, 0xc1, 0x62, 0xf4, 0xfc, 0x0c, 0xf4, 0xc1, 0x62, 0xfc,
 962  		0xfc, 0x08, 0xf4, 0x02,
 963  	};
 964  	const size_t n = sizeof(tbl);
 965  	CYBOZU_TEST_EQUAL(c.getSize(), n);
 966  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
 967  }
 968  
 969  CYBOZU_TEST_AUTO(shld)
 970  {
 971  	struct Code : Xbyak::CodeGenerator {
 972  		Code()
 973  		{
 974  			shld(rax|T_nf, rcx, cl);
 975  			shld(r16, rcx, cl);
 976  			shld(r16, rcx, 0x9);
 977  			shld(r16|T_nf, rcx, 0x9);
 978  			shld(r20, r16, rcx, cl);
 979  			shld(r20|T_nf, r16, rcx, cl);
 980  			shld(r20, r16, rcx, 0x9);
 981  			shld(r20|T_nf, r16, rcx, 0x9);
 982  			shld(r20, ptr [r21], rcx, 0x9);
 983  
 984  			shrd(rax|T_nf, rcx, cl);
 985  			shrd(r16, rcx, cl);
 986  			shrd(r16, rcx, 0x9);
 987  			shrd(r16|T_nf, rcx, 0x9);
 988  			shrd(r20, r16, rcx, cl);
 989  			shrd(r20|T_nf, r16, rcx, cl);
 990  			shrd(r20, r16, rcx, 0x9);
 991  			shrd(r20|T_nf, r16, rcx, 0x9);
 992  			shrd(r20, ptr [r21], rcx, 0x9);
 993  		}
 994  	} c;
 995  	const uint8_t tbl[] = {
 996  		// shld
 997  		0x62, 0xf4, 0xfc, 0x0c, 0xa5, 0xc8, 0x62, 0xfc, 0xfc, 0x08, 0xa5, 0xc8, 0x62, 0xfc, 0xfc, 0x08,
 998  		0x24, 0xc8, 0x09, 0x62, 0xfc, 0xfc, 0x0c, 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0xa5, 0xc8,
 999  		0x62, 0xfc, 0xdc, 0x14, 0xa5, 0xc8, 0x62, 0xfc, 0xdc, 0x10, 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xdc,
1000  		0x14, 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0x24, 0x4d, 0x00, 0x09,
1001  		// shrd
1002  		0x62, 0xf4, 0xfc, 0x0c, 0xad, 0xc8, 0x62, 0xfc, 0xfc, 0x08, 0xad, 0xc8, 0x62, 0xfc, 0xfc, 0x08,
1003  		0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xfc, 0x0c, 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0xad, 0xc8,
1004  		0x62, 0xfc, 0xdc, 0x14, 0xad, 0xc8, 0x62, 0xfc, 0xdc, 0x10, 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xdc,
1005  		0x14, 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0x2c, 0x4d, 0x00, 0x09,
1006  
1007  	};
1008  	const size_t n = sizeof(tbl);
1009  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1010  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1011  }
1012  
1013  CYBOZU_TEST_AUTO(base)
1014  {
1015  	struct Code : Xbyak::CodeGenerator {
1016  		Code()
1017  		{
1018  			lea(r30, ptr[r20+r21]);
1019  			add(r30, r20);
1020  			add(r30, ptr[r20]);
1021  			cmp(r30, ptr[r20]);
1022  			push(r16);
1023  			pop(r16);
1024  		}
1025  	} c;
1026  	const uint8_t tbl[] = {
1027  		// lea
1028  		0xd5, 0x7c, 0x8d, 0x34, 0x2c,
1029  		// add
1030  		0xd5, 0x59, 0x01, 0xe6,
1031  		0xd5, 0x5c, 0x03, 0x34, 0x24,
1032  		// cmp
1033  		0xd5, 0x5c, 0x3b, 0x34, 0x24,
1034  		// push
1035  		0xd5, 0x10, 0x50,
1036  		// pop
1037  		0xd5, 0x10, 0x58,
1038  	};
1039  	const size_t n = sizeof(tbl);
1040  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1041  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1042  }
1043  
1044  CYBOZU_TEST_AUTO(mov_misc)
1045  {
1046  	struct Code : Xbyak::CodeGenerator {
1047  		Code()
1048  		{
1049  			movdir64b(r16, ptr [r20+r21*8+0x4]);
1050  			movdiri(ptr [r20+r21*8+0x4], r16);
1051  
1052  			movbe(ptr [r16], r30w);
1053  			movbe(ptr [r16], r30d);
1054  			movbe(ptr [r16], r30);
1055  			movbe(r30w, ptr [r16]);
1056  			movbe(r30d, ptr [r16]);
1057  			movbe(r30, ptr [r16]);
1058  
1059  			crc32(r30d, r8b);
1060  			crc32(r30d, r8w);
1061  			crc32(r30d, r8d);
1062  			crc32(r30, r8b);
1063  			crc32(r30, r8);
1064  
1065  			jmpabs(0x12345678aabbccdd);
1066  
1067  			cmpbexadd(ptr [r20+r30*8], r21, r22);
1068  			cmpbexadd(ptr [r20+r30*8], r21d, r22d);
1069  
1070  			cmovb(r8, r9, r10);
1071  			cmovb(r8d, r9d, r10d);
1072  
1073  			setb(r31b);
1074  			setb(r31b|T_zu);
1075  			setb(r15b|T_zu);
1076  			setb(ptr [r30]);
1077  
1078  			bswap(eax);
1079  			bswap(r8d);
1080  			bswap(r16d);
1081  			bswap(rcx);
1082  			bswap(r9);
1083  			bswap(r17);
1084  		}
1085  	} c;
1086  	const uint8_t tbl[] = {
1087  		// movdir64b
1088  		0x62, 0xec, 0x79, 0x08, 0xf8, 0x44, 0xec, 0x04,
1089  		// movdiri
1090  		0x62, 0xec, 0xf8, 0x08, 0xf9, 0x44, 0xec, 0x04,
1091  		// movbe
1092  		0x62, 0x6c, 0x7d, 0x08, 0x61, 0x30, 0x62, 0x6c, 0x7c, 0x08, 0x61, 0x30, 0x62, 0x6c, 0xfc, 0x08,
1093  		0x61, 0x30, 0x62, 0x6c, 0x7d, 0x08, 0x60, 0x30, 0x62, 0x6c, 0x7c, 0x08, 0x60, 0x30, 0x62, 0x6c,
1094  		0xfc, 0x08, 0x60, 0x30,
1095  		// crc32
1096  		0x62, 0x44, 0x7c, 0x08, 0xf0, 0xf0, 0x62, 0x44, 0x7d, 0x08, 0xf1, 0xf0, 0x62, 0x44, 0x7c, 0x08,
1097  		0xf1, 0xf0, 0x62, 0x44, 0xfc, 0x08, 0xf0, 0xf0, 0x62, 0x44, 0xfc, 0x08, 0xf1, 0xf0,
1098  		// jmpabs
1099  		0xd5, 0x00, 0xa1, 0xdd, 0xcc, 0xbb, 0xaa, 0x78, 0x56, 0x34, 0x12,
1100  		//cmpbexadd
1101  		0x62, 0xaa, 0xc9, 0x00, 0xe6, 0x2c, 0xf4,
1102  		0x62, 0xaa, 0x49, 0x00, 0xe6, 0x2c, 0xf4,
1103  		// cmovb
1104  		0x62, 0x54, 0xbc, 0x18, 0x42, 0xca,
1105  		0x62, 0x54, 0x3c, 0x18, 0x42, 0xca,
1106  		// setb
1107  		0x62, 0xdc, 0x7f, 0x08, 0x42, 0xc7,
1108  		0x62, 0xdc, 0x7f, 0x18, 0x42, 0xc7,
1109  		0x62, 0xd4, 0x7f, 0x18, 0x42, 0xc7,
1110  		0x62, 0xdc, 0x7f, 0x08, 0x42, 0x06,
1111  		// bswap
1112  		0x0f, 0xc8,
1113  		0x41, 0x0f, 0xc8,
1114  		0xd5, 0x90, 0xc8,
1115  		0x48, 0x0f, 0xc9,
1116  		0x49, 0x0f, 0xc9,
1117  		0xd5, 0x98, 0xc9,
1118  	};
1119  	const size_t n = sizeof(tbl);
1120  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1121  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1122  }
1123  
1124  CYBOZU_TEST_AUTO(shift_2op)
1125  {
1126  	struct Code : Xbyak::CodeGenerator {
1127  		Code()
1128  		{
1129  			shl(r16b, cl);
1130  			shl(r16w, cl);
1131  			shl(r16d, cl);
1132  			shl(r16, cl);
1133  			shl(r16|T_nf, cl);
1134  			shl(r16b, 0x3);
1135  			shl(r16w, 0x5);
1136  			shl(r16d, 0x7);
1137  			shl(r16, 0x9);
1138  			shl(byte [r30], 0x3);
1139  			shl(word [r30], 0x5);
1140  			shl(dword [r30], 0x7);
1141  			shl(qword [r30], 0x9);
1142  
1143  			shr(r16b, cl);
1144  			shr(r16w, cl);
1145  			shr(r16d, cl);
1146  			shr(r16, cl);
1147  			shr(r16|T_nf, cl);
1148  			shr(r16b, 0x3);
1149  			shr(r16w, 0x5);
1150  			shr(r16d, 0x7);
1151  			shr(r16, 0x9);
1152  			shr(byte [r30], 0x3);
1153  			shr(word [r30], 0x5);
1154  			shr(dword [r30], 0x7);
1155  			shr(qword [r30], 0x9);
1156  
1157  			sar(r16b, cl);
1158  			sar(r16w, cl);
1159  			sar(r16d, cl);
1160  			sar(r16, cl);
1161  			sar(r16|T_nf, cl);
1162  			sar(r16b, 0x3);
1163  			sar(r16w, 0x5);
1164  			sar(r16d, 0x7);
1165  			sar(r16, 0x9);
1166  			sar(byte [r30], 0x3);
1167  			sar(word [r30], 0x5);
1168  			sar(dword [r30], 0x7);
1169  			sar(qword [r30], 0x9);
1170  
1171  			ror(r16b, cl);
1172  			ror(r16w, cl);
1173  			ror(r16d, cl);
1174  			ror(r16, cl);
1175  			ror(r16|T_nf, cl);
1176  			ror(r16b, 0x3);
1177  			ror(r16w, 0x5);
1178  			ror(r16d, 0x7);
1179  			ror(r16, 0x9);
1180  			ror(byte [r30], 0x3);
1181  			ror(word [r30], 0x5);
1182  			ror(dword [r30], 0x7);
1183  			ror(qword [r30], 0x9);
1184  
1185  			rol(r16b, cl);
1186  			rol(r16w, cl);
1187  			rol(r16d, cl);
1188  			rol(r16, cl);
1189  			rol(r16|T_nf, cl);
1190  			rol(r16b, 0x3);
1191  			rol(r16w, 0x5);
1192  			rol(r16d, 0x7);
1193  			rol(r16, 0x9);
1194  			rol(byte [r30], 0x3);
1195  			rol(word [r30], 0x5);
1196  			rol(dword [r30], 0x7);
1197  			rol(qword [r30], 0x9);
1198  
1199  			rcl(r16b, cl);
1200  			rcl(r16w, cl);
1201  			rcl(r16d, cl);
1202  			rcl(r16, cl);
1203  			rcl(r16b, 0x3);
1204  			rcl(r16w, 0x5);
1205  			rcl(r16d, 0x7);
1206  			rcl(r16, 0x9);
1207  			rcl(byte [r30], 0x3);
1208  			rcl(word [r30], 0x5);
1209  			rcl(dword [r30], 0x7);
1210  			rcl(qword [r30], 0x9);
1211  
1212  			rcr(r16b, cl);
1213  			rcr(r16w, cl);
1214  			rcr(r16d, cl);
1215  			rcr(r16, cl);
1216  			rcr(r16b, 0x3);
1217  			rcr(r16w, 0x5);
1218  			rcr(r16d, 0x7);
1219  			rcr(r16, 0x9);
1220  			rcr(byte [r30], 0x3);
1221  			rcr(word [r30], 0x5);
1222  			rcr(dword [r30], 0x7);
1223  			rcr(qword [r30], 0x9);
1224  		}
1225  	} c;
1226  	const uint8_t tbl[] = {
1227  		// shl
1228  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xe0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xe0, 0x62, 0xfc, 0x7c, 0x08,
1229  		0xd3, 0xe0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xe0, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xe0, 0x62, 0xfc,
1230  		0x7c, 0x08, 0xc0, 0xe0, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xe0, 0x05, 0x62, 0xfc, 0x7c, 0x08,
1231  		0xc1, 0xe0, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xe0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x26,
1232  		0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x26, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x26, 0x07, 0x62,
1233  		0xdc, 0xfc, 0x08, 0xc1, 0x26, 0x09,
1234  		// shr
1235  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xe8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xe8, 0x62, 0xfc, 0x7c, 0x08,
1236  		0xd3, 0xe8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xe8, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xe8, 0x62, 0xfc,
1237  		0x7c, 0x08, 0xc0, 0xe8, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xe8, 0x05, 0x62, 0xfc, 0x7c, 0x08,
1238  		0xc1, 0xe8, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xe8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x2e,
1239  		0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x2e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x2e, 0x07, 0x62,
1240  		0xdc, 0xfc, 0x08, 0xc1, 0x2e, 0x09,
1241  		// sar
1242  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xf8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xf8, 0x62, 0xfc, 0x7c, 0x08,
1243  		0xd3, 0xf8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xf8, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xf8, 0x62, 0xfc,
1244  		0x7c, 0x08, 0xc0, 0xf8, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xf8, 0x05, 0x62, 0xfc, 0x7c, 0x08,
1245  		0xc1, 0xf8, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xf8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x3e,
1246  		0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x3e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x3e, 0x07, 0x62,
1247  		0xdc, 0xfc, 0x08, 0xc1, 0x3e, 0x09,
1248  		// ror
1249  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xc8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xc8, 0x62, 0xfc, 0x7c, 0x08,
1250  		0xd3, 0xc8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xc8, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xc8, 0x62, 0xfc,
1251  		0x7c, 0x08, 0xc0, 0xc8, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xc8, 0x05, 0x62, 0xfc, 0x7c, 0x08,
1252  		0xc1, 0xc8, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xc8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x0e,
1253  		0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x0e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x0e, 0x07, 0x62,
1254  		0xdc, 0xfc, 0x08, 0xc1, 0x0e, 0x09,
1255  		// rol
1256  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xc0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xc0, 0x62, 0xfc, 0x7c, 0x08,
1257  		0xd3, 0xc0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xc0, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xc0, 0x62, 0xfc,
1258  		0x7c, 0x08, 0xc0, 0xc0, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xc0, 0x05, 0x62, 0xfc, 0x7c, 0x08,
1259  		0xc1, 0xc0, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xc0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x06,
1260  		0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x06, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x06, 0x07, 0x62,
1261  		0xdc, 0xfc, 0x08, 0xc1, 0x06, 0x09,
1262  		// rcl
1263  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xd0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xd0, 0x62, 0xfc, 0x7c, 0x08,
1264  		0xd3, 0xd0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xd0, 0x62, 0xfc, 0x7c, 0x08, 0xc0, 0xd0, 0x03, 0x62,
1265  		0xfc, 0x7d, 0x08, 0xc1, 0xd0, 0x05, 0x62, 0xfc, 0x7c, 0x08, 0xc1, 0xd0, 0x07, 0x62, 0xfc, 0xfc,
1266  		0x08, 0xc1, 0xd0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x16, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1,
1267  		0x16, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x16, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x16, 0x09,
1268  		// rcr
1269  		0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xd8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xd8, 0x62, 0xfc, 0x7c, 0x08,
1270  		0xd3, 0xd8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xd8, 0x62, 0xfc, 0x7c, 0x08, 0xc0, 0xd8, 0x03, 0x62,
1271  		0xfc, 0x7d, 0x08, 0xc1, 0xd8, 0x05, 0x62, 0xfc, 0x7c, 0x08, 0xc1, 0xd8, 0x07, 0x62, 0xfc, 0xfc,
1272  		0x08, 0xc1, 0xd8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x1e, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1,
1273  		0x1e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x1e, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x1e, 0x09,
1274  	};
1275  	const size_t n = sizeof(tbl);
1276  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1277  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1278  }
1279  
1280  CYBOZU_TEST_AUTO(shift_3op)
1281  {
1282  	struct Code : Xbyak::CodeGenerator {
1283  		Code()
1284  		{
1285  			rcl(r20b, r16b, cl);
1286  			rcl(r20w, r16w, cl);
1287  			rcl(r20d, r16d, cl);
1288  			rcl(r20, r16, cl);
1289  			rcl(r20b, ptr [r16], cl);
1290  			rcl(r20w, ptr [r16], cl);
1291  			rcl(r20d, ptr [r16], cl);
1292  			rcl(r20, ptr [r16], cl);
1293  			rcl(r20b, r16b, 0x2);
1294  			rcl(r20w, r16w, 0x4);
1295  			rcl(r20d, r16d, 0x6);
1296  			rcl(r20, r16, 0x8);
1297  			rcl(r20b, ptr [r16], 0x2);
1298  			rcl(r20w, ptr [r16], 0x4);
1299  			rcl(r20d, ptr [r16], 0x6);
1300  			rcl(r20, ptr [r16], 0x8);
1301  
1302  			rcr(r20b, r16b, cl);
1303  			rcr(r20w, r16w, cl);
1304  			rcr(r20d, r16d, cl);
1305  			rcr(r20, r16, cl);
1306  			rcr(r20b, ptr [r16], cl);
1307  			rcr(r20w, ptr [r16], cl);
1308  			rcr(r20d, ptr [r16], cl);
1309  			rcr(r20, ptr [r16], cl);
1310  			rcr(r20b, r16b, 0x2);
1311  			rcr(r20w, r16w, 0x4);
1312  			rcr(r20d, r16d, 0x6);
1313  			rcr(r20, r16, 0x8);
1314  			rcr(r20b, ptr [r16], 0x2);
1315  			rcr(r20w, ptr [r16], 0x4);
1316  			rcr(r20d, ptr [r16], 0x6);
1317  			rcr(r20, ptr [r16], 0x8);
1318  
1319  			rol(r20b, r16b, cl);
1320  			rol(r20w, r16w, cl);
1321  			rol(r20d, r16d, cl);
1322  			rol(r20, r16, cl);
1323  			rol(r20b, ptr [r16], cl);
1324  			rol(r20w, ptr [r16], cl);
1325  			rol(r20d, ptr [r16], cl);
1326  			rol(r20, ptr [r16], cl);
1327  			rol(r20b, r16b, 0x2);
1328  			rol(r20w, r16w, 0x4);
1329  			rol(r20d, r16d, 0x6);
1330  			rol(r20, r16, 0x8);
1331  			rol(r20b, ptr [r16], 0x2);
1332  			rol(r20w, ptr [r16], 0x4);
1333  			rol(r20d, ptr [r16], 0x6);
1334  			rol(r20, ptr [r16], 0x8);
1335  
1336  			shl(r20b, r16b, cl);
1337  			shl(r20w, r16w, cl);
1338  			shl(r20d, r16d, cl);
1339  			shl(r20, r16, cl);
1340  			shl(r20b, ptr [r16], cl);
1341  			shl(r20w, ptr [r16], cl);
1342  			shl(r20d, ptr [r16], cl);
1343  			shl(r20, ptr [r16], cl);
1344  			shl(r20b, r16b, 0x2);
1345  			shl(r20w, r16w, 0x4);
1346  			shl(r20d, r16d, 0x6);
1347  			shl(r20, r16, 0x8);
1348  			shl(r20b, ptr [r16], 0x2);
1349  			shl(r20w, ptr [r16], 0x4);
1350  			shl(r20d, ptr [r16], 0x6);
1351  			shl(r20, ptr [r16], 0x8);
1352  
1353  			shr(r20b, r16b, cl);
1354  			shr(r20w, r16w, cl);
1355  			shr(r20d, r16d, cl);
1356  			shr(r20, r16, cl);
1357  			shr(r20b, ptr [r16], cl);
1358  			shr(r20w, ptr [r16], cl);
1359  			shr(r20d, ptr [r16], cl);
1360  			shr(r20, ptr [r16], cl);
1361  			shr(r20b, r16b, 0x2);
1362  			shr(r20w, r16w, 0x4);
1363  			shr(r20d, r16d, 0x6);
1364  			shr(r20, r16, 0x8);
1365  			shr(r20b, ptr [r16], 0x2);
1366  			shr(r20w, ptr [r16], 0x4);
1367  			shr(r20d, ptr [r16], 0x6);
1368  			shr(r20, ptr [r16], 0x8);
1369  
1370  			sar(r20b, r16b, cl);
1371  			sar(r20w, r16w, cl);
1372  			sar(r20d, r16d, cl);
1373  			sar(r20, r16, cl);
1374  			sar(r20b, ptr [r16], cl);
1375  			sar(r20w, ptr [r16], cl);
1376  			sar(r20d, ptr [r16], cl);
1377  			sar(r20, ptr [r16], cl);
1378  			sar(r20b, r16b, 0x2);
1379  			sar(r20w, r16w, 0x4);
1380  			sar(r20d, r16d, 0x6);
1381  			sar(r20, r16, 0x8);
1382  			sar(r20b, ptr [r16], 0x2);
1383  			sar(r20w, ptr [r16], 0x4);
1384  			sar(r20d, ptr [r16], 0x6);
1385  			sar(r20, ptr [r16], 0x8);
1386  		}
1387  	} c;
1388  	const uint8_t tbl[] = {
1389  		// rcl
1390  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xd0, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xd0, 0x62, 0xfc, 0x5c, 0x10,
1391  		0xd3, 0xd0, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xd0, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x10, 0x62, 0xfc,
1392  		0x5d, 0x10, 0xd3, 0x10, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x10, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x10,
1393  		0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xd0, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xd0, 0x04, 0x62, 0xfc,
1394  		0x5c, 0x10, 0xc1, 0xd0, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xd0, 0x08, 0x62, 0xfc, 0x5c, 0x10,
1395  		0xc0, 0x10, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x10, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x10,
1396  		0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x10, 0x08,
1397  		// rcr
1398  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xd8, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xd8, 0x62, 0xfc, 0x5c, 0x10,
1399  		0xd3, 0xd8, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xd8, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x18, 0x62, 0xfc,
1400  		0x5d, 0x10, 0xd3, 0x18, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x18, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x18,
1401  		0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xd8, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xd8, 0x04, 0x62, 0xfc,
1402  		0x5c, 0x10, 0xc1, 0xd8, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xd8, 0x08, 0x62, 0xfc, 0x5c, 0x10,
1403  		0xc0, 0x18, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x18, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x18,
1404  		0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x18, 0x08,
1405  		// rol
1406  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xc0, 0x62, 0xfc,
1407  		0x5d, 0x10, 0xd3, 0xc0, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0xc0, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xc0,
1408  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x00, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0x00, 0x62, 0xfc, 0x5c, 0x10,
1409  		0xd3, 0x00, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x00, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xc0, 0x02, 0x62,
1410  		0xfc, 0x5d, 0x10, 0xc1, 0xc0, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0xc0, 0x06, 0x62, 0xfc, 0xdc,
1411  		0x10, 0xc1, 0xc0, 0x08, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0x00, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1,
1412  		0x00, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x00, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x00, 0x08,
1413  		// shl
1414  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xe0, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xe0, 0x62, 0xfc, 0x5c, 0x10,
1415  		0xd3, 0xe0, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xe0, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x20, 0x62, 0xfc,
1416  		0x5d, 0x10, 0xd3, 0x20, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x20, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x20,
1417  		0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xe0, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xe0, 0x04, 0x62, 0xfc,
1418  		0x5c, 0x10, 0xc1, 0xe0, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xe0, 0x08, 0x62, 0xfc, 0x5c, 0x10,
1419  		0xc0, 0x20, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x20, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x20,
1420  		0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x20, 0x08,
1421  		// shr
1422  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xe8, 0x62, 0xfc,
1423  		0x5d, 0x10, 0xd3, 0xe8, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0xe8, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xe8,
1424  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x28, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0x28, 0x62, 0xfc, 0x5c, 0x10,
1425  		0xd3, 0x28, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x28, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xe8, 0x02, 0x62,
1426  		0xfc, 0x5d, 0x10, 0xc1, 0xe8, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0xe8, 0x06, 0x62, 0xfc, 0xdc,
1427  		0x10, 0xc1, 0xe8, 0x08, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0x28, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1,
1428  		0x28, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x28, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x28, 0x08,
1429  		// sar
1430  		0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xf8, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xf8, 0x62, 0xfc, 0x5c, 0x10,
1431  		0xd3, 0xf8, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xf8, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x38, 0x62, 0xfc,
1432  		0x5d, 0x10, 0xd3, 0x38, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x38, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x38,
1433  		0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xf8, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xf8, 0x04, 0x62, 0xfc,
1434  		0x5c, 0x10, 0xc1, 0xf8, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xf8, 0x08, 0x62, 0xfc, 0x5c, 0x10,
1435  		0xc0, 0x38, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x38, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x38,
1436  		0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x38, 0x08,
1437  	};
1438  	const size_t n = sizeof(tbl);
1439  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1440  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1441  }
1442  
1443  CYBOZU_TEST_AUTO(push2_pop2)
1444  {
1445  	struct Code : Xbyak::CodeGenerator {
1446  		Code()
1447  		{
1448  			push2(r20, r30);
1449  			push2(rax, rcx);
1450  			push2p(r20, r30);
1451  			push2p(rdx, r8);
1452  
1453  			pop2(rax, rcx);
1454  			pop2(r20, r30);
1455  			pop2p(rax, rcx);
1456  			pop2p(r20, r30);
1457  		}
1458  	} c;
1459  	const uint8_t tbl[] = {
1460  		// push2
1461  		0x62, 0xdc, 0x5c, 0x10, 0xff, 0xf6,
1462  		0x62, 0xf4, 0x7c, 0x18, 0xff, 0xf1,
1463  		// push2p (What is this?)
1464  		0x62, 0xdc, 0xdc, 0x10, 0xff, 0xf6,
1465  		0x62, 0xd4, 0xec, 0x18, 0xff, 0xf0,
1466  		// pop2
1467  		0x62, 0xf4, 0x7c, 0x18, 0x8f, 0xc1,
1468  		0x62, 0xdc, 0x5c, 0x10, 0x8f, 0xc6,
1469  		// pop2p
1470  		0x62, 0xf4, 0xfc, 0x18, 0x8f, 0xc1,
1471  		0x62, 0xdc, 0xdc, 0x10, 0x8f, 0xc6,
1472  	};
1473  	const size_t n = sizeof(tbl);
1474  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1475  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1476  }
1477  
1478  CYBOZU_TEST_AUTO(ccmp)
1479  {
1480  	struct Code : Xbyak::CodeGenerator {
1481  		Code()
1482  		{
1483  			ccmpb(rax, rbx, 0);
1484  			ccmpb(r30b, r31b, 1);
1485  			ccmpb(r30w, r31w, 2);
1486  			ccmpb(r30d, r31d, 3);
1487  			ccmpb(r30, r31, 4);
1488  			ccmpb(ptr [r30], r31b, 5);
1489  			ccmpb(ptr [r30], r31w, 6);
1490  			ccmpb(ptr [r30], r31d, 7);
1491  			ccmpb(ptr [r30], r31, 8);
1492  			ccmpb(r31b, ptr [r30], 9);
1493  			ccmpb(r31w, ptr [r30], 10);
1494  			ccmpb(r31d, ptr [r30], 11);
1495  			ccmpb(r31, ptr [r30], 12);
1496  
1497  			ccmpb(r20b, 0x12, 9);
1498  			ccmpb(r20w, 0x1234, 9);
1499  			ccmpb(r20d, 0x12345678, 9);
1500  			ccmpb(r20, 0x12345678, 9);
1501  			ccmpb(byte [r20], 0x12, 9);
1502  			ccmpb(word [r20], 0x1234, 9);
1503  			ccmpb(dword [r20], 0x12345678, 9);
1504  			ccmpb(qword [r20], 0x12345678, 9);
1505  
1506  			ccmpo(rax, rcx, 0);
1507  			ccmpno(rax, rcx, 1);
1508  			ccmpb(rax, rcx, 2);
1509  			ccmpnb(rax, rcx, 3);
1510  			ccmpz(rax, rcx, 4);
1511  			ccmpnz(rax, rcx, 5);
1512  			ccmpbe(rax, rcx, 6);
1513  			ccmpnbe(rax, rcx, 7);
1514  			ccmps(rax, rcx, 8);
1515  			ccmpns(rax, rcx, 9);
1516  			ccmpt(rax, rcx, 10);
1517  			ccmpf(rax, rcx, 11);
1518  			ccmpl(rax, rcx, 12);
1519  			ccmpnl(rax, rcx, 13);
1520  			ccmple(rax, rcx, 14);
1521  			ccmpnle(rax, rcx, 15);
1522  		}
1523  	} c;
1524  	const uint8_t tbl[] = {
1525  		// ccmpb
1526  		0x62, 0xf4, 0x84, 0x02, 0x39, 0xd8, 0x62, 0x4c, 0x0c, 0x02, 0x38, 0xfe, 0x62, 0x4c, 0x15, 0x02,
1527  		0x39, 0xfe, 0x62, 0x4c, 0x1c, 0x02, 0x39, 0xfe, 0x62, 0x4c, 0xa4, 0x02, 0x39, 0xfe, 0x62, 0x4c,
1528  		0x2c, 0x02, 0x38, 0x3e, 0x62, 0x4c, 0x35, 0x02, 0x39, 0x3e, 0x62, 0x4c, 0x3c, 0x02, 0x39, 0x3e,
1529  		0x62, 0x4c, 0xc4, 0x02, 0x39, 0x3e, 0x62, 0x4c, 0x4c, 0x02, 0x3a, 0x3e, 0x62, 0x4c, 0x55, 0x02,
1530  		0x3b, 0x3e, 0x62, 0x4c, 0x5c, 0x02, 0x3b, 0x3e, 0x62, 0x4c, 0xe4, 0x02, 0x3b, 0x3e,
1531  		// ccmpb imm
1532  		0x62, 0x7c, 0x4c, 0x02, 0x80, 0xfc, 0x12, 0x62, 0x7c, 0x4d, 0x02, 0x81, 0xfc, 0x34, 0x12, 0x62,
1533  		0x7c, 0x4c, 0x02, 0x81, 0xfc, 0x78, 0x56, 0x34, 0x12, 0x62, 0x7c, 0xcc, 0x02, 0x81, 0xfc, 0x78,
1534  		0x56, 0x34, 0x12, 0x62, 0x7c, 0x4c, 0x02, 0x80, 0x3c, 0x24, 0x12, 0x62, 0x7c, 0x4d, 0x02, 0x81,
1535  		0x3c, 0x24, 0x34, 0x12, 0x62, 0x7c, 0x4c, 0x02, 0x81, 0x3c, 0x24, 0x78, 0x56, 0x34, 0x12, 0x62,
1536  		0x7c, 0xcc, 0x02, 0x81, 0x3c, 0x24, 0x78, 0x56, 0x34, 0x12,
1537  		// all
1538  		0x62, 0xf4, 0x84, 0x00, 0x39, 0xc8, 0x62, 0xf4, 0x8c, 0x01, 0x39, 0xc8, 0x62, 0xf4, 0x94, 0x02,
1539  		0x39, 0xc8, 0x62, 0xf4, 0x9c, 0x03, 0x39, 0xc8, 0x62, 0xf4, 0xa4, 0x04, 0x39, 0xc8, 0x62, 0xf4,
1540  		0xac, 0x05, 0x39, 0xc8, 0x62, 0xf4, 0xb4, 0x06, 0x39, 0xc8, 0x62, 0xf4, 0xbc, 0x07, 0x39, 0xc8,
1541  		0x62, 0xf4, 0xc4, 0x08, 0x39, 0xc8, 0x62, 0xf4, 0xcc, 0x09, 0x39, 0xc8, 0x62, 0xf4, 0xd4, 0x0a,
1542  		0x39, 0xc8, 0x62, 0xf4, 0xdc, 0x0b, 0x39, 0xc8, 0x62, 0xf4, 0xe4, 0x0c, 0x39, 0xc8, 0x62, 0xf4,
1543  		0xec, 0x0d, 0x39, 0xc8, 0x62, 0xf4, 0xf4, 0x0e, 0x39, 0xc8, 0x62, 0xf4, 0xfc, 0x0f, 0x39, 0xc8,
1544  	};
1545  	const size_t n = sizeof(tbl);
1546  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1547  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1548  }
1549  
1550  CYBOZU_TEST_AUTO(ctestb)
1551  {
1552  	struct Code : Xbyak::CodeGenerator {
1553  		Code()
1554  		{
1555  			ctestb(r30b, r31b, 0);
1556  			ctestb(r30w, r31w, 1);
1557  			ctestb(r30d, r31d, 2);
1558  			ctestb(r30, r31, 3);
1559  
1560  			ctestb(ptr [r30], r31b, 4);
1561  			ctestb(ptr [r30], r31w, 5);
1562  			ctestb(ptr [r30], r31d, 6);
1563  			ctestb(ptr [r30], r31, 7);
1564  
1565  			ctestb(r30b, 0x12, 8);
1566  			ctestb(r30w, 0x1234, 9);
1567  			ctestb(r30d, 0x12345678, 10);
1568  			ctestb(r30, 0x12345678, 11);
1569  
1570  			ctestb(byte [r30], 0x12, 12);
1571  			ctestb(word [r30], 0x1234, 13);
1572  			ctestb(dword [r30], 0x12345678, 14);
1573  			ctestb(qword [r30], 0x12345678, 15);
1574  
1575  			// all
1576  			ctesto(rax, rcx, 0);
1577  			ctestno(rax, rcx, 1);
1578  			ctestb(rax, rcx, 2);
1579  			ctestnb(rax, rcx, 3);
1580  			ctestz(rax, rcx, 4);
1581  			ctestnz(rax, rcx, 5);
1582  			ctestbe(rax, rcx, 6);
1583  			ctestnbe(rax, rcx, 7);
1584  			ctests(rax, rcx, 8);
1585  			ctestns(rax, rcx, 9);
1586  			ctestt(rax, rcx, 10);
1587  			ctestf(rax, rcx, 11);
1588  			ctestl(rax, rcx, 12);
1589  			ctestnl(rax, rcx, 13);
1590  			ctestle(rax, rcx, 14);
1591  			ctestnle(rax, rcx, 15);
1592  		}
1593  	} c;
1594  	const uint8_t tbl[] = {
1595  		// ctestb
1596  		0x62, 0x4c, 0x04, 0x02, 0x84, 0xfe, 0x62, 0x4c, 0x0d, 0x02, 0x85, 0xfe, 0x62, 0x4c, 0x14, 0x02,
1597  		0x85, 0xfe, 0x62, 0x4c, 0x9c, 0x02, 0x85, 0xfe, 0x62, 0x4c, 0x24, 0x02, 0x84, 0x3e, 0x62, 0x4c,
1598  		0x2d, 0x02, 0x85, 0x3e, 0x62, 0x4c, 0x34, 0x02, 0x85, 0x3e, 0x62, 0x4c, 0xbc, 0x02, 0x85, 0x3e,
1599  		0x62, 0xdc, 0x44, 0x02, 0xf6, 0xc6, 0x12, 0x62, 0xdc, 0x4d, 0x02, 0xf7, 0xc6, 0x34, 0x12, 0x62,
1600  		0xdc, 0x54, 0x02, 0xf7, 0xc6, 0x78, 0x56, 0x34, 0x12, 0x62, 0xdc, 0xdc, 0x02, 0xf7, 0xc6, 0x78,
1601  		0x56, 0x34, 0x12, 0x62, 0xdc, 0x64, 0x02, 0xf6, 0x06, 0x12, 0x62, 0xdc, 0x6d, 0x02, 0xf7, 0x06,
1602  		0x34, 0x12, 0x62, 0xdc, 0x74, 0x02, 0xf7, 0x06, 0x78, 0x56, 0x34, 0x12, 0x62, 0xdc, 0xfc, 0x02,
1603  		0xf7, 0x06, 0x78, 0x56, 0x34, 0x12,
1604  		// all
1605  		0x62, 0xf4, 0x84, 0x00, 0x85, 0xc8, 0x62, 0xf4, 0x8c, 0x01, 0x85, 0xc8, 0x62, 0xf4, 0x94, 0x02,
1606  		0x85, 0xc8, 0x62, 0xf4, 0x9c, 0x03, 0x85, 0xc8, 0x62, 0xf4, 0xa4, 0x04, 0x85, 0xc8, 0x62, 0xf4,
1607  		0xac, 0x05, 0x85, 0xc8, 0x62, 0xf4, 0xb4, 0x06, 0x85, 0xc8, 0x62, 0xf4, 0xbc, 0x07, 0x85, 0xc8,
1608  		0x62, 0xf4, 0xc4, 0x08, 0x85, 0xc8, 0x62, 0xf4, 0xcc, 0x09, 0x85, 0xc8, 0x62, 0xf4, 0xd4, 0x0a,
1609  		0x85, 0xc8, 0x62, 0xf4, 0xdc, 0x0b, 0x85, 0xc8, 0x62, 0xf4, 0xe4, 0x0c, 0x85, 0xc8, 0x62, 0xf4,
1610  		0xec, 0x0d, 0x85, 0xc8, 0x62, 0xf4, 0xf4, 0x0e, 0x85, 0xc8, 0x62, 0xf4, 0xfc, 0x0f, 0x85, 0xc8,
1611  	};
1612  	const size_t n = sizeof(tbl);
1613  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1614  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1615  }
1616  
1617  CYBOZU_TEST_AUTO(cfcmov)
1618  {
1619  	struct Code : Xbyak::CodeGenerator {
1620  		Code()
1621  		{
1622  			cfcmovb(r30w, r31w);
1623  			cfcmovb(r30d, r31d);
1624  			cfcmovb(r30, r31);
1625  			cfcmovb(ptr [r8+r20*4+0x3], r19w);
1626  			cfcmovb(ptr [r8+r20*4+0x3], r19d);
1627  			cfcmovb(ptr [r8+r20*4+0x3], r19);
1628  			cfcmovb(r30w, ptr [r9]);
1629  			cfcmovb(r30d, ptr [r9]);
1630  			cfcmovb(r30, ptr [r9]);
1631  			cfcmovb(r20w, r30w, r31w);
1632  			cfcmovb(r20d, r30d, r31d);
1633  			cfcmovb(r20, r30, r31);
1634  			cfcmovb(r20w, r30w, ptr [r9]);
1635  			cfcmovb(r20d, r30d, ptr [r9]);
1636  			cfcmovb(r20, r30, ptr [r9]);
1637  
1638  			// all
1639  			cfcmovo(r20, r21, r22);
1640  			cfcmovo(r20, r21, ptr [r22]);
1641  			cfcmovno(r20, r21, r22);
1642  			cfcmovno(r20, r21, ptr [r22]);
1643  			cfcmovb(r20, r21, r22);
1644  			cfcmovb(r20, r21, ptr [r22]);
1645  			cfcmovnb(r20, r21, r22);
1646  			cfcmovnb(r20, r21, ptr [r22]);
1647  			cfcmovz(r20, r21, r22);
1648  			cfcmovz(r20, r21, ptr [r22]);
1649  			cfcmovnz(r20, r21, r22);
1650  			cfcmovnz(r20, r21, ptr [r22]);
1651  			cfcmovbe(r20, r21, r22);
1652  			cfcmovbe(r20, r21, ptr [r22]);
1653  			cfcmovnbe(r20, r21, r22);
1654  			cfcmovnbe(r20, r21, ptr [r22]);
1655  			cfcmovs(r20, r21, r22);
1656  			cfcmovs(r20, r21, ptr [r22]);
1657  			cfcmovns(r20, r21, r22);
1658  			cfcmovns(r20, r21, ptr [r22]);
1659  			cfcmovp(r20, r21, r22);
1660  			cfcmovp(r20, r21, ptr [r22]);
1661  			cfcmovnp(r20, r21, r22);
1662  			cfcmovnp(r20, r21, ptr [r22]);
1663  			cfcmovl(r20, r21, r22);
1664  			cfcmovl(r20, r21, ptr [r22]);
1665  			cfcmovnl(r20, r21, r22);
1666  			cfcmovnl(r20, r21, ptr [r22]);
1667  			cfcmovle(r20, r21, r22);
1668  			cfcmovle(r20, r21, ptr [r22]);
1669  			cfcmovnle(r20, r21, r22);
1670  			cfcmovnle(r20, r21, ptr [r22]);
1671  
1672  		}
1673  	} c;
1674  	const uint8_t tbl[] = {
1675  		0x62, 0x4c, 0x7d, 0x0c, 0x42, 0xfe, 0x62, 0x4c, 0x7c, 0x0c, 0x42, 0xfe, 0x62, 0x4c, 0xfc, 0x0c,
1676  		0x42, 0xfe, 0x62, 0xc4, 0x79, 0x0c, 0x42, 0x5c, 0xa0, 0x03, 0x62, 0xc4, 0x78, 0x0c, 0x42, 0x5c,
1677  		0xa0, 0x03, 0x62, 0xc4, 0xf8, 0x0c, 0x42, 0x5c, 0xa0, 0x03, 0x62, 0x44, 0x7d, 0x08, 0x42, 0x31,
1678  		0x62, 0x44, 0x7c, 0x08, 0x42, 0x31, 0x62, 0x44, 0xfc, 0x08, 0x42, 0x31, 0x62, 0x4c, 0x5d, 0x14,
1679  		0x42, 0xf7, 0x62, 0x4c, 0x5c, 0x14, 0x42, 0xf7, 0x62, 0x4c, 0xdc, 0x14, 0x42, 0xf7, 0x62, 0x44,
1680  		0x5d, 0x14, 0x42, 0x31, 0x62, 0x44, 0x5c, 0x14, 0x42, 0x31, 0x62, 0x44, 0xdc, 0x14, 0x42, 0x31,
1681  		// all
1682  		0x62, 0xec, 0xdc, 0x14, 0x40, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x40, 0x2e, 0x62, 0xec, 0xdc, 0x14,
1683  		0x41, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x41, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x42, 0xee, 0x62, 0xec,
1684  		0xdc, 0x14, 0x42, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x43, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x43, 0x2e,
1685  		0x62, 0xec, 0xdc, 0x14, 0x44, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x44, 0x2e, 0x62, 0xec, 0xdc, 0x14,
1686  		0x45, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x45, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x46, 0xee, 0x62, 0xec,
1687  		0xdc, 0x14, 0x46, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x47, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x47, 0x2e,
1688  		0x62, 0xec, 0xdc, 0x14, 0x48, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x48, 0x2e, 0x62, 0xec, 0xdc, 0x14,
1689  		0x49, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x49, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4a, 0xee, 0x62, 0xec,
1690  		0xdc, 0x14, 0x4a, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4b, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4b, 0x2e,
1691  		0x62, 0xec, 0xdc, 0x14, 0x4c, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4c, 0x2e, 0x62, 0xec, 0xdc, 0x14,
1692  		0x4d, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4d, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4e, 0xee, 0x62, 0xec,
1693  		0xdc, 0x14, 0x4e, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4f, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4f, 0x2e,
1694  	};
1695  	const size_t n = sizeof(tbl);
1696  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1697  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1698  }
1699  
1700  CYBOZU_TEST_AUTO(evex_misc)
1701  {
1702  	struct Code : Xbyak::CodeGenerator {
1703  		Code()
1704  		{
1705  			vmovaps(xmm31, ptr [r30+r26*8+0x40]);
1706  			vaddps(zmm30, zmm21, ptr [r20+r30*1]);
1707  			vcvtsd2si(r30d, ptr [r17+r31*4]);
1708  
1709  			test(ptr[r30], r31);
1710  			test(byte[r30], 0x12);
1711  			call(r20);
1712  			call(ptr[r20]);
1713  		}
1714  	} c;
1715  	const uint8_t tbl[] = {
1716  		0x62, 0x09, 0x78, 0x08, 0x28, 0x7c, 0xd6, 0x04,
1717  		0x62, 0x29, 0x50, 0x40, 0x58, 0x34, 0x34, 0x62,
1718  		0x29, 0x7b, 0x08, 0x2d, 0x34, 0xb9,
1719  
1720  		0xd5, 0x5d, 0x85, 0x3e,
1721  		0xd5, 0x11, 0xf6, 0x06, 0x12,
1722  		0xd5, 0x10, 0xff, 0xd4,
1723  		0xd5, 0x10, 0xff, 0x14, 0x24,
1724  	};
1725  	const size_t n = sizeof(tbl);
1726  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1727  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1728  }
1729  
1730  CYBOZU_TEST_AUTO(kmov)
1731  {
1732  	struct Code : Xbyak::CodeGenerator {
1733  		Code()
1734  		{
1735  			kmovb(k1, ptr [r20]);
1736  			kmovb(k2, r21d);
1737  			kmovb(ptr [r22], k3);
1738  			kmovb(r23d, k4);
1739  
1740  			kmovw(k1, ptr [r20]);
1741  			kmovw(k2, r21d);
1742  			kmovw(ptr [r22], k3);
1743  			kmovw(r23d, k4);
1744  
1745  			kmovd(k1, ptr [r20]);
1746  			kmovd(k2, r21d);
1747  			kmovd(ptr [r22], k3);
1748  			kmovd(r23d, k4);
1749  
1750  			kmovq(k1, ptr [r20]);
1751  			kmovq(k2, r21);
1752  			kmovq(ptr [r22], k3);
1753  			kmovq(r23, k4);
1754  		}
1755  	} c;
1756  	const uint8_t tbl[] = {
1757  		0x62, 0xf9, 0x7d, 0x08, 0x90, 0x0c, 0x24, 0x62, 0xf9, 0x7d, 0x08, 0x92, 0xd5, 0x62, 0xf9, 0x7d,
1758  		0x08, 0x91, 0x1e, 0x62, 0xe1, 0x7d, 0x08, 0x93, 0xfc, 0x62, 0xf9, 0x7c, 0x08, 0x90, 0x0c, 0x24,
1759  		0x62, 0xf9, 0x7c, 0x08, 0x92, 0xd5, 0x62, 0xf9, 0x7c, 0x08, 0x91, 0x1e, 0x62, 0xe1, 0x7c, 0x08,
1760  		0x93, 0xfc, 0x62, 0xf9, 0xfd, 0x08, 0x90, 0x0c, 0x24, 0x62, 0xf9, 0x7f, 0x08, 0x92, 0xd5, 0x62,
1761  		0xf9, 0xfd, 0x08, 0x91, 0x1e, 0x62, 0xe1, 0x7f, 0x08, 0x93, 0xfc, 0x62, 0xf9, 0xfc, 0x08, 0x90,
1762  		0x0c, 0x24, 0x62, 0xf9, 0xff, 0x08, 0x92, 0xd5, 0x62, 0xf9, 0xfc, 0x08, 0x91, 0x1e, 0x62, 0xe1,
1763  		0xff, 0x08, 0x93, 0xfc,
1764  	};
1765  	const size_t n = sizeof(tbl);
1766  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1767  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1768  }
1769  
1770  CYBOZU_TEST_AUTO(amx)
1771  {
1772  	struct Code : Xbyak::CodeGenerator {
1773  		Code()
1774  		{
1775  			ldtilecfg(ptr [r30+r29*4+0x12]);
1776  			sttilecfg(ptr [r30+r29*4+0x12]);
1777  			tileloadd(tmm1, ptr [r30+r29*4+0x12]);
1778  			tileloaddt1(tmm3, ptr [r30+r29*4+0x12]);
1779  			tilestored(ptr [r30+r29*4+0x12], tmm5);
1780  		}
1781  	} c;
1782  	const uint8_t tbl[] = {
1783  		0x62, 0x9a, 0x78, 0x08, 0x49, 0x44, 0xae, 0x12, 0x62, 0x9a, 0x79, 0x08, 0x49, 0x44, 0xae, 0x12,
1784  		0x62, 0x9a, 0x7b, 0x08, 0x4b, 0x4c, 0xae, 0x12, 0x62, 0x9a, 0x79, 0x08, 0x4b, 0x5c, 0xae, 0x12,
1785  		0x62, 0x9a, 0x7a, 0x08, 0x4b, 0x6c, 0xae, 0x12,
1786  	};
1787  	const size_t n = sizeof(tbl);
1788  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1789  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1790  }
1791  
1792  CYBOZU_TEST_AUTO(aeskl)
1793  {
1794  	struct Code : Xbyak::CodeGenerator {
1795  		Code()
1796  		{
1797  			aesdec128kl(xmm15, ptr[rax+rcx*4+0x12]);
1798  			aesdec128kl(xmm15, ptr[r30+r29*8+0x34]);
1799  
1800  			aesdec256kl(xmm15, ptr[rax+rcx*4+0x12]);
1801  			aesdec256kl(xmm15, ptr[r30+r29*8+0x34]);
1802  
1803  			aesdecwide128kl(ptr[rax+rcx*4+0x12]);
1804  			aesdecwide128kl(ptr[r30+r29*8+0x34]);
1805  
1806  			aesdecwide256kl(ptr[rax+rcx*4+0x12]);
1807  			aesdecwide256kl(ptr[r30+r29*8+0x34]);
1808  
1809  			aesenc128kl(xmm15, ptr[rax+rcx*4+0x12]);
1810  			aesenc128kl(xmm15, ptr[r30+r29*8+0x34]);
1811  
1812  			aesenc256kl(xmm15, ptr[rax+rcx*4+0x12]);
1813  			aesenc256kl(xmm15, ptr[r30+r29*8+0x34]);
1814  
1815  			aesencwide128kl(ptr[rax+rcx*4+0x12]);
1816  			aesencwide128kl(ptr[r30+r29*8+0x34]);
1817  
1818  			aesencwide256kl(ptr[rax+rcx*4+0x12]);
1819  			aesencwide256kl(ptr[r30+r29*8+0x34]);
1820  		}
1821  	} c;
1822  	const uint8_t tbl[] = {
1823  		// aesdec128kl
1824  		0xf3, 0x44, 0x0f, 0x38, 0xdd, 0x7c, 0x88, 0x12,
1825  		0x62, 0x1c, 0x7a, 0x08, 0xdd, 0x7c, 0xee, 0x34,
1826  		// aesdec256kl
1827  		0xf3, 0x44, 0x0f, 0x38, 0xdf, 0x7c, 0x88, 0x12,
1828  		0x62, 0x1c, 0x7a, 0x08, 0xdf, 0x7c, 0xee, 0x34,
1829  		// aesdecwide128kl
1830  		0xf3, 0x0f, 0x38, 0xd8, 0x4c, 0x88, 0x12,
1831  		0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x4c, 0xee, 0x34, 0xf3,
1832  		// aesdecwide256kl
1833  		0x0f, 0x38, 0xd8, 0x5c, 0x88, 0x12,
1834  		0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x5c, 0xee, 0x34,
1835  		// aesenc128kl
1836  		0xf3, 0x44, 0x0f, 0x38, 0xdc, 0x7c, 0x88, 0x12,
1837  		0x62, 0x1c, 0x7a, 0x08, 0xdc, 0x7c, 0xee, 0x34,
1838  		// aesenc256kl
1839  		0xf3, 0x44, 0x0f, 0x38, 0xde, 0x7c, 0x88, 0x12,
1840  		0x62, 0x1c, 0x7a, 0x08, 0xde, 0x7c, 0xee, 0x34,
1841  		// aesencwide128kl
1842  		0xf3, 0x0f, 0x38, 0xd8, 0x44, 0x88, 0x12,
1843  		0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x44, 0xee, 0x34,
1844  		// aesencwide256kl
1845  		0xf3, 0x0f, 0x38, 0xd8, 0x54, 0x88, 0x12,
1846  		0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x54, 0xee, 0x34,
1847  	};
1848  	const size_t n = sizeof(tbl);
1849  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1850  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1851  }
1852  
1853  CYBOZU_TEST_AUTO(encodekey)
1854  {
1855  	struct Code : Xbyak::CodeGenerator {
1856  		Code()
1857  		{
1858  			encodekey128(eax, ebx);
1859  			encodekey128(eax, r8d);
1860  			encodekey128(r8d, ebx);
1861  			encodekey128(r30d, r29d);
1862  
1863  			encodekey256(eax, ebx);
1864  			encodekey256(eax, r8d);
1865  			encodekey256(r8d, ebx);
1866  			encodekey256(r30d, r29d);
1867  		}
1868  	} c;
1869  	const uint8_t tbl[] = {
1870  		// encodekey128
1871  		0xf3, 0x0f, 0x38, 0xfa, 0xc3,
1872  		0x62, 0xd4, 0x7e, 0x08, 0xda, 0xc0,
1873  		0x62, 0x74, 0x7e, 0x08, 0xda, 0xc3,
1874  		0x62, 0x4c, 0x7e, 0x08, 0xda, 0xf5,
1875  		// encodekey256
1876  		0xf3, 0x0f, 0x38, 0xfb, 0xc3,
1877  		0x62, 0xd4, 0x7e, 0x08, 0xdb, 0xc0,
1878  		0x62, 0x74, 0x7e, 0x08, 0xdb, 0xc3,
1879  		0x62, 0x4c, 0x7e, 0x08, 0xdb, 0xf5,
1880  	};
1881  	const size_t n = sizeof(tbl);
1882  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1883  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1884  }
1885  
1886  CYBOZU_TEST_AUTO(sha)
1887  {
1888  	struct Code : Xbyak::CodeGenerator {
1889  		Code()
1890  		{
1891  			sha1msg1(xmm15, ptr [r30+r29*8+0x12]);
1892  			sha1msg2(xmm15, ptr [r30+r29*8+0x12]);
1893  			sha1nexte(xmm15, ptr [r30+r29*8+0x12]);
1894  			sha256msg1(xmm15, ptr [r30+r29*8+0x12]);
1895  			sha256msg2(xmm15, ptr [r30+r29*8+0x12]);
1896  			sha256rnds2(xmm15, ptr [r30+r29*8+0x12]);
1897  			sha1rnds4(xmm15, ptr [r30+r29*8+0x12], 0x23);
1898  		}
1899  	} c;
1900  	const uint8_t tbl[] = {
1901  		0x62, 0x1c, 0x78, 0x08, 0xd9, 0x7c, 0xee, 0x12,
1902  		0x62, 0x1c, 0x78, 0x08, 0xda, 0x7c, 0xee, 0x12,
1903  		0x62, 0x1c, 0x78, 0x08, 0xd8, 0x7c, 0xee, 0x12,
1904  		0x62, 0x1c, 0x78, 0x08, 0xdc, 0x7c, 0xee, 0x12,
1905  		0x62, 0x1c, 0x78, 0x08, 0xdd, 0x7c, 0xee, 0x12,
1906  		0x62, 0x1c, 0x78, 0x08, 0xdb, 0x7c, 0xee, 0x12,
1907  		0x62, 0x1c, 0x78, 0x08, 0xd4, 0x7c, 0xee, 0x12, 0x23,
1908  	};
1909  	const size_t n = sizeof(tbl);
1910  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1911  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1912  }
1913  
1914  CYBOZU_TEST_AUTO(0x0f_rex2)
1915  {
1916  	struct Code : Xbyak::CodeGenerator {
1917  		Code()
1918  		{
1919  			addps(xmm3, ptr [r30+r20*4+0x4]);
1920  			movups(xmm5, ptr [r16]);
1921  			movq(r31, xmm5);
1922  			cvtsd2si(r20, ptr [r30]);
1923  			bsr(r20, r30);
1924  		}
1925  	} c;
1926  	const uint8_t tbl[] = {
1927  		0xd5, 0xb1, 0x58, 0x5c, 0xa6, 0x04,
1928  		0xd5, 0x90, 0x10, 0x28, 0x66,
1929  		0xd5, 0x99, 0x7e, 0xef, 0xf2,
1930  		0xd5, 0xd9, 0x2d, 0x26,
1931  		0xd5, 0xd9, 0xbd, 0xe6,
1932  
1933  	};
1934  	const size_t n = sizeof(tbl);
1935  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1936  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1937  }
1938  
1939  CYBOZU_TEST_AUTO(rao_int)
1940  {
1941  	struct Code : Xbyak::CodeGenerator {
1942  		Code()
1943  		{
1944  			aadd(ptr [r16+r31*1], r17d);
1945  			aadd(ptr [r16+r31*1], r17);
1946  			aand(ptr [r16+r31*1], r17d);
1947  			aand(ptr [r16+r31*1], r17);
1948  			aor(ptr [r16+r31*1], r17d);
1949  			aor(ptr [r16+r31*1], r17);
1950  			axor(ptr [r16+r31*1], r17d);
1951  			axor(ptr [r16+r31*1], r17);
1952  		}
1953  	} c;
1954  	const uint8_t tbl[] = {
1955  		0x62, 0xac, 0x78, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf8, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac,
1956  		0x79, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf9, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7b, 0x08,
1957  		0xfc, 0x0c, 0x38, 0x62, 0xac, 0xfb, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7a, 0x08, 0xfc, 0x0c,
1958  		0x38, 0x62, 0xac, 0xfa, 0x08, 0xfc, 0x0c, 0x38,
1959  	};
1960  	const size_t n = sizeof(tbl);
1961  	CYBOZU_TEST_EQUAL(c.getSize(), n);
1962  	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
1963  }
1964