sf_test.cpp
1 #define XBYAK_NO_OP_NAMES 2 #include <xbyak/xbyak_util.h> 3 4 #ifdef XBYAK32 5 #error "this sample is for only 64-bit mode" 6 #endif 7 8 using namespace Xbyak::util; 9 10 struct Code : public Xbyak::CodeGenerator { 11 void gen1() 12 { 13 StackFrame sf(this, 1); 14 mov(rax, sf.p[0]); 15 } 16 void gen2() 17 { 18 StackFrame sf(this, 2); 19 lea(rax, ptr [sf.p[0] + sf.p[1]]); 20 } 21 void gen3() 22 { 23 StackFrame sf(this, 3); 24 mov(rax, sf.p[0]); 25 add(rax, sf.p[1]); 26 add(rax, sf.p[2]); 27 } 28 void gen4() 29 { 30 StackFrame sf(this, 4); 31 mov(rax, sf.p[0]); 32 add(rax, sf.p[1]); 33 add(rax, sf.p[2]); 34 add(rax, sf.p[3]); 35 } 36 37 void gen5() 38 { 39 StackFrame sf(this, 4, UseRCX); 40 xor_(rcx, rcx); 41 mov(rax, sf.p[0]); 42 add(rax, sf.p[1]); 43 add(rax, sf.p[2]); 44 add(rax, sf.p[3]); 45 } 46 47 void gen6() 48 { 49 StackFrame sf(this, 4, UseRCX | UseRDX); 50 xor_(rcx, rcx); 51 xor_(rdx, rdx); 52 mov(rax, sf.p[0]); 53 add(rax, sf.p[1]); 54 add(rax, sf.p[2]); 55 add(rax, sf.p[3]); 56 } 57 58 void gen7() 59 { 60 StackFrame sf(this, 3, UseRCX | UseRDX); 61 xor_(rcx, rcx); 62 xor_(rdx, rdx); 63 mov(rax, sf.p[0]); 64 add(rax, sf.p[1]); 65 add(rax, sf.p[2]); 66 } 67 68 void gen8() 69 { 70 StackFrame sf(this, 3, 3 | UseRCX | UseRDX); 71 xor_(rcx, rcx); 72 xor_(rdx, rdx); 73 mov(sf.t[0], 1); 74 mov(sf.t[1], 2); 75 mov(sf.t[2], 3); 76 mov(rax, sf.p[0]); 77 add(rax, sf.p[1]); 78 add(rax, sf.p[2]); 79 } 80 81 void gen9() 82 { 83 StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32); 84 xor_(rcx, rcx); 85 xor_(rdx, rdx); 86 mov(sf.t[0], 1); 87 mov(sf.t[1], 2); 88 mov(sf.t[2], 3); 89 mov(rax, sf.p[0]); 90 add(rax, sf.p[1]); 91 add(rax, sf.p[2]); 92 mov(ptr [rsp + 8 * 0], rax); 93 mov(ptr [rsp + 8 * 1], rax); 94 mov(ptr [rsp + 8 * 2], rax); 95 mov(ptr [rsp + 8 * 3], rax); 96 } 97 98 void gen10() 99 { 100 StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32); 101 xor_(rcx, rcx); 102 xor_(rdx, rdx); 103 for (int i = 0; i < 8; i++) { 104 mov(sf.t[i], i); 105 } 106 mov(rax, sf.p[0]); 107 add(rax, sf.p[1]); 108 add(rax, sf.p[2]); 109 add(rax, sf.p[3]); 110 mov(ptr [rsp + 8 * 0], rax); 111 mov(ptr [rsp + 8 * 1], rax); 112 mov(ptr [rsp + 8 * 2], rax); 113 mov(ptr [rsp + 8 * 3], rax); 114 } 115 116 void gen11() 117 { 118 StackFrame sf(this, 0, UseRCX); 119 xor_(rcx, rcx); 120 mov(rax, 3); 121 } 122 123 void gen12() 124 { 125 StackFrame sf(this, 4, UseRDX); 126 xor_(rdx, rdx); 127 mov(rax, sf.p[0]); 128 add(rax, sf.p[1]); 129 add(rax, sf.p[2]); 130 add(rax, sf.p[3]); 131 } 132 133 /* 134 int64_t f(const int64_t a[13]) { return sum-of-a[]; } 135 */ 136 void gen13() 137 { 138 StackFrame sf(this, 1, 13); 139 for (int i = 0; i < 13; i++) { 140 mov(sf.t[i], ptr[sf.p[0] + i * 8]); 141 } 142 mov(rax, sf.t[0]); 143 for (int i = 1; i < 13; i++) { 144 add(rax, sf.t[i]); 145 } 146 } 147 /* 148 same as gen13 149 */ 150 void gen14() 151 { 152 StackFrame sf(this, 1, 11 | UseRCX | UseRDX); 153 Pack t = sf.t; 154 t.append(rcx); 155 t.append(rdx); 156 for (int i = 0; i < 13; i++) { 157 mov(t[i], ptr[sf.p[0] + i * 8]); 158 } 159 mov(rax, t[0]); 160 for (int i = 1; i < 13; i++) { 161 add(rax, t[i]); 162 } 163 } 164 /* 165 return (1 << 15) - 1; 166 */ 167 void gen15() 168 { 169 StackFrame sf(this, 0, 14, 8); 170 Pack t = sf.t; 171 t.append(rax); 172 for (int i = 0; i < 15; i++) { 173 mov(t[i], 1 << i); 174 } 175 mov(qword[rsp], 0); 176 for (int i = 0; i < 15; i++) { 177 add(ptr[rsp], t[i]); 178 } 179 mov(rax, ptr[rsp]); 180 } 181 }; 182 183 struct Code2 : Xbyak::CodeGenerator { 184 Code2() 185 : Xbyak::CodeGenerator(4096 * 32) 186 { 187 } 188 void gen(int pNum, int tNum, int stackSizeByte) 189 { 190 StackFrame sf(this, pNum, tNum, stackSizeByte); 191 if (tNum & UseRCX) xor_(rcx, rcx); 192 if (tNum & UseRDX) xor_(rdx, rdx); 193 for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) { 194 mov(sf.t[i], 5); 195 } 196 for (int i = 0; i < stackSizeByte; i++) { 197 mov(byte [rsp + i], 0); 198 } 199 mov(rax, 1); 200 for (int i = 0; i < pNum; i++) { 201 add(rax, sf.p[i]); 202 } 203 } 204 void gen2(int pNum, int tNum, int stackSizeByte) 205 { 206 StackFrame sf(this, pNum, tNum, stackSizeByte); 207 mov(rax, rsp); 208 } 209 }; 210 211 212 static int errNum = 0; 213 void check(int x, int y) 214 { 215 if (x != y) { 216 printf("err x=%d, y=%d\n", x, y); 217 errNum++; 218 } 219 } 220 221 void verify(const uint8_t *f, int pNum) 222 { 223 switch (pNum) { 224 case 0: 225 check(1, reinterpret_cast<int (*)()>(f)()); 226 return; 227 case 1: 228 check(11, reinterpret_cast<int (*)(int)>(f)(10)); 229 return; 230 case 2: 231 check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100)); 232 return; 233 case 3: 234 check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000)); 235 return; 236 case 4: 237 check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000)); 238 return; 239 default: 240 printf("ERR pNum=%d\n", pNum); 241 exit(1); 242 } 243 } 244 245 void testAll() 246 { 247 Code2 code; 248 for (int stackSize = 0; stackSize < 32; stackSize += 7) { 249 for (int pNum = 0; pNum < 4; pNum++) { 250 for (int mode = 0; mode < 4; mode++) { 251 int maxNum = 0; 252 int opt = 0; 253 if (mode == 0) { 254 maxNum = 10; 255 } else if (mode == 1) { 256 maxNum = 9; 257 opt = UseRCX; 258 } else if (mode == 2) { 259 maxNum = 9; 260 opt = UseRDX; 261 } else { 262 maxNum = 8; 263 opt = UseRCX | UseRDX; 264 } 265 for (int tNum = 0; tNum < maxNum; tNum++) { 266 // printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize); 267 const uint8_t *f = code.getCurr(); 268 code.gen(pNum, tNum | opt, stackSize); 269 verify(f, pNum); 270 /* 271 check rsp is 16-byte aligned if stackSize > 0 272 */ 273 if (stackSize > 0) { 274 Code2 c2; 275 c2.gen2(pNum, tNum | opt, stackSize); 276 uint64_t addr = c2.getCode<uint64_t (*)()>()(); 277 check(addr % 16, 0); 278 } 279 } 280 } 281 } 282 } 283 } 284 285 void testPartial() 286 { 287 Code code; 288 int (*f1)(int) = code.getCurr<int (*)(int)>(); 289 code.gen1(); 290 check(5, f1(5)); 291 292 int (*f2)(int, int) = code.getCurr<int (*)(int, int)>(); 293 code.gen2(); 294 check(9, f2(3, 6)); 295 296 int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>(); 297 code.gen3(); 298 check(14, f3(1, 4, 9)); 299 300 int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>(); 301 code.gen4(); 302 check(30, f4(1, 4, 9, 16)); 303 304 int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>(); 305 code.gen5(); 306 check(23, f5(2, 5, 7, 9)); 307 308 int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>(); 309 code.gen6(); 310 check(18, f6(3, 4, 5, 6)); 311 312 int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>(); 313 code.gen7(); 314 check(12, f7(3, 4, 5)); 315 316 int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>(); 317 code.gen8(); 318 check(23, f8(5, 8, 10)); 319 320 int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>(); 321 code.gen9(); 322 check(60, f9(10, 20, 30)); 323 324 int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>(); 325 code.gen10(); 326 check(100, f10(10, 20, 30, 40)); 327 328 int (*f11)() = code.getCurr<int (*)()>(); 329 code.gen11(); 330 check(3, f11()); 331 332 int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>(); 333 code.gen12(); 334 check(24, f12(3, 5, 7, 9)); 335 336 { 337 int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; 338 int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>(); 339 code.gen13(); 340 check(91, f13(tbl)); 341 342 int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>(); 343 code.gen14(); 344 check(91, f14(tbl)); 345 } 346 int (*f15)() = code.getCurr<int (*)()>(); 347 code.gen15(); 348 check((1 << 15) - 1, f15()); 349 } 350 351 void put(const Xbyak::util::Pack& p) 352 { 353 for (size_t i = 0, n = p.size(); i < n; i++) { 354 printf("%s ", p[i].toString()); 355 } 356 printf("\n"); 357 } 358 359 void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum) 360 { 361 for (size_t i = 0; i < tblNum; i++) { 362 check(p[i].getIdx(), tbl[i]); 363 } 364 } 365 366 void testPack() 367 { 368 const int N = 10; 369 Xbyak::Reg64 regTbl[N]; 370 for (int i = 0; i < N; i++) { 371 regTbl[i] = Xbyak::Reg64(i); 372 } 373 Xbyak::util::Pack p(regTbl, N); 374 const struct { 375 int pos; 376 int num; 377 int tbl[10]; 378 } tbl[] = { 379 { 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } }, 380 { 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } }, 381 { 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } }, 382 { 3, 7, { 3, 4, 5, 6, 7, 8, 9 } }, 383 { 4, 6, { 4, 5, 6, 7, 8, 9 } }, 384 { 5, 5, { 5, 6, 7, 8, 9 } }, 385 { 6, 4, { 6, 7, 8, 9 } }, 386 { 7, 3, { 7, 8, 9 } }, 387 { 8, 2, { 8, 9 } }, 388 { 9, 1, { 9 } }, 389 { 3, 5, { 3, 4, 5, 6, 7 } }, 390 }; 391 for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) { 392 const int pos = tbl[i].pos; 393 const int num = tbl[i].num; 394 verifyPack(p.sub(pos, num), tbl[i].tbl, num); 395 if (pos + num == N) { 396 verifyPack(p.sub(pos), tbl[i].tbl, num); 397 } 398 } 399 } 400 401 int main() 402 try 403 { 404 testAll(); 405 406 testPartial(); 407 testPack(); 408 printf("errNum=%d\n", errNum); 409 } catch (std::exception& e) { 410 printf("err %s\n", e.what()); 411 return 1; 412 } catch (...) { 413 puts("ERR"); 414 return 1; 415 } 416