test-grad0.cpp
1 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows 2 #include "ggml.h" 3 4 #include <cmath> 5 #include <cstdio> 6 #include <cstdlib> 7 #include <cassert> 8 9 #if defined(_MSC_VER) 10 #pragma warning(disable: 4244 4267) // possible loss of data 11 #endif 12 13 #if defined(__GNUC__) 14 #pragma GCC diagnostic ignored "-Wdouble-promotion" 15 #endif 16 17 #define MAX_NARGS 3 18 19 #undef MIN 20 #undef MAX 21 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 22 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 23 24 #define GGML_SILU_FP16 25 26 // 27 // logging 28 // 29 30 #if (GGML_DEBUG >= 1) 31 #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) 32 #else 33 #define GGML_PRINT_DEBUG(...) 34 #endif 35 36 #if (GGML_DEBUG >= 5) 37 #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) 38 #else 39 #define GGML_PRINT_DEBUG_5(...) 40 #endif 41 42 #if (GGML_DEBUG >= 10) 43 #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) 44 #else 45 #define GGML_PRINT_DEBUG_10(...) 46 #endif 47 48 #define GGML_PRINT(...) printf(__VA_ARGS__) 49 50 static float frand(void) { 51 return (float)rand()/(float)RAND_MAX; 52 } 53 54 static int irand(int n) { 55 if (n == 0) return 0; 56 return rand()%n; 57 } 58 59 static void get_random_dims(int64_t * dims, int ndims) { 60 dims[0] = dims[1] = dims[2] = dims[3] = 1; 61 62 for (int i = 0; i < ndims; i++) { 63 dims[i] = 1 + irand(4); 64 } 65 } 66 67 static struct ggml_tensor * get_random_tensor_f32( 68 struct ggml_context * ctx0, 69 int ndims, 70 int64_t ne[], 71 float fmin, 72 float fmax) { 73 struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne); 74 75 switch (ndims) { 76 case 1: 77 for (int i0 = 0; i0 < ne[0]; i0++) { 78 ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; 79 } 80 break; 81 case 2: 82 for (int i1 = 0; i1 < ne[1]; i1++) { 83 for (int i0 = 0; i0 < ne[0]; i0++) { 84 ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; 85 } 86 } 87 break; 88 case 3: 89 for (int i2 = 0; i2 < ne[2]; i2++) { 90 for (int i1 = 0; i1 < ne[1]; i1++) { 91 for (int i0 = 0; i0 < ne[0]; i0++) { 92 ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; 93 } 94 } 95 } 96 break; 97 case 4: 98 for (int i3 = 0; i3 < ne[3]; i3++) { 99 for (int i2 = 0; i2 < ne[2]; i2++) { 100 for (int i1 = 0; i1 < ne[1]; i1++) { 101 for (int i0 = 0; i0 < ne[0]; i0++) { 102 ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; 103 } 104 } 105 } 106 } 107 break; 108 default: 109 assert(false); 110 } 111 112 return result; 113 } 114 115 static struct ggml_tensor * get_random_tensor_f16( 116 struct ggml_context * ctx0, 117 int ndims, 118 int64_t ne[], 119 float fmin, 120 float fmax) { 121 struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); 122 123 switch (ndims) { 124 case 1: 125 for (int i0 = 0; i0 < ne[0]; i0++) { 126 ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); 127 } 128 break; 129 case 2: 130 for (int i1 = 0; i1 < ne[1]; i1++) { 131 for (int i0 = 0; i0 < ne[0]; i0++) { 132 ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); 133 } 134 } 135 break; 136 case 3: 137 for (int i2 = 0; i2 < ne[2]; i2++) { 138 for (int i1 = 0; i1 < ne[1]; i1++) { 139 for (int i0 = 0; i0 < ne[0]; i0++) { 140 ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); 141 } 142 } 143 } 144 break; 145 case 4: 146 for (int i3 = 0; i3 < ne[3]; i3++) { 147 for (int i2 = 0; i2 < ne[2]; i2++) { 148 for (int i1 = 0; i1 < ne[1]; i1++) { 149 for (int i0 = 0; i0 < ne[0]; i0++) { 150 ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); 151 } 152 } 153 } 154 } 155 break; 156 default: 157 assert(false); 158 } 159 160 return result; 161 } 162 163 static struct ggml_tensor * get_random_tensor_i32( 164 struct ggml_context * ctx0, 165 int ndims, 166 int64_t ne[], 167 int32_t imin, 168 int32_t imax) { 169 struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne); 170 171 switch (ndims) { 172 case 1: 173 for (int i0 = 0; i0 < ne[0]; i0++) { 174 ((int32_t *)result->data)[i0] = irand(imax - imin) + imin; 175 } 176 break; 177 case 2: 178 for (int i1 = 0; i1 < ne[1]; i1++) { 179 for (int i0 = 0; i0 < ne[0]; i0++) { 180 ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin; 181 } 182 } 183 break; 184 case 3: 185 for (int i2 = 0; i2 < ne[2]; i2++) { 186 for (int i1 = 0; i1 < ne[1]; i1++) { 187 for (int i0 = 0; i0 < ne[0]; i0++) { 188 ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; 189 } 190 } 191 } 192 break; 193 case 4: 194 for (int i3 = 0; i3 < ne[3]; i3++) { 195 for (int i2 = 0; i2 < ne[2]; i2++) { 196 for (int i1 = 0; i1 < ne[1]; i1++) { 197 for (int i0 = 0; i0 < ne[0]; i0++) { 198 ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; 199 } 200 } 201 } 202 } 203 break; 204 default: 205 assert(false); 206 } 207 208 return result; 209 } 210 211 static bool check_gradient( 212 const char * op_name, 213 struct ggml_context * ctx0, 214 struct ggml_tensor * x[], 215 struct ggml_tensor * f, 216 int ndims, 217 int nargs, 218 float eps, 219 float max_error_abs, 220 float max_error_rel) { 221 222 static int n_threads = -1; 223 if (n_threads < 0) { 224 n_threads = GGML_DEFAULT_N_THREADS; 225 226 const char *env = getenv("GGML_N_THREADS"); 227 if (env) { 228 n_threads = atoi(env); 229 } 230 231 printf("GGML_N_THREADS = %d\n", n_threads); 232 } 233 234 struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true); 235 struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true); 236 ggml_build_forward_expand(gf, f); 237 ggml_graph_cpy(gf, gb); 238 ggml_build_backward_expand(ctx0, gf, gb, false); 239 240 ggml_graph_compute_with_ctx(ctx0, gf, n_threads); 241 242 ggml_graph_reset (gf); 243 ggml_set_f32 (f->grad, 1.0f); 244 245 ggml_graph_compute_with_ctx(ctx0, gb, n_threads); 246 247 // ggml_graph_dump_dot(gf, NULL, "test-grad0-forward.dot"); 248 // ggml_graph_dump_dot(gb, gf, "test-grad0-backward.dot"); 249 250 for (int i = 0; i < nargs; ++i) { 251 const int nelements = ggml_nelements(x[i]); 252 for (int k = 0; k < nelements; ++k) { 253 // compute gradient using finite differences 254 const float x0 = ggml_get_f32_1d(x[i], k); 255 const float xm = x0 - eps; 256 const float xp = x0 + eps; 257 ggml_set_f32_1d(x[i], k, xp); 258 259 ggml_graph_compute_with_ctx(ctx0, gf, n_threads); 260 261 const double f0 = ggml_get_f32_1d(f, 0); 262 263 ggml_set_f32_1d(x[i], k, xm); 264 265 ggml_graph_compute_with_ctx(ctx0, gf, n_threads); 266 267 const double f1 = ggml_get_f32_1d(f, 0); 268 const double g0 = (f0 - f1)/(2.0*(double) eps); 269 270 ggml_set_f32_1d(x[i], k, x0); 271 272 // compute gradient using backward graph 273 ggml_graph_reset (gf); 274 ggml_set_f32 (f->grad, 1.0f); 275 276 ggml_graph_compute_with_ctx(ctx0, gb, n_threads); 277 278 const double g1 = ggml_get_f32_1d(x[i]->grad, k); 279 280 const double error_abs = fabs(g0 - g1); 281 const double error_rel = g0 != 0 ? fabs(g0 - g1)/fabs(g0) : 0; 282 283 if (error_abs > max_error_abs || error_rel > max_error_rel) { 284 printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n", 285 op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel); 286 //assert(false); 287 return false; 288 } 289 } 290 } 291 292 return true; 293 } 294 295 // TODO: clean-up this .. 296 static bool check_mat_mul( 297 const struct ggml_tensor * y, 298 const struct ggml_tensor * x0, 299 const struct ggml_tensor * x1) { 300 float * dst = (float *) y->data; 301 float * src0 = (float *) x0->data; 302 float * src1 = (float *) x1->data; 303 304 const int nc = x0->ne[1]; 305 const int nr = x1->ne[1]; 306 const int nk = x0->ne[0]; 307 308 GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk); 309 310 GGML_PRINT_DEBUG("x0:\n"); 311 for (int j = 0; j < x0->ne[1]; ++j) { 312 for (int i = 0; i < x0->ne[0]; ++i) { 313 GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]); 314 } 315 GGML_PRINT_DEBUG("\n"); 316 } 317 GGML_PRINT_DEBUG("\n"); 318 319 GGML_PRINT_DEBUG("x1:\n"); 320 for (int j = 0; j < x1->ne[1]; ++j) { 321 for (int i = 0; i < x1->ne[0]; ++i) { 322 GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]); 323 } 324 GGML_PRINT_DEBUG("\n"); 325 } 326 GGML_PRINT_DEBUG("\n"); 327 328 GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]); 329 for (int j = 0; j < y->ne[1]; ++j) { 330 for (int i = 0; i < y->ne[0]; ++i) { 331 GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]); 332 } 333 GGML_PRINT_DEBUG("\n"); 334 } 335 336 for (int i = 0; i < nr; ++i) { 337 for (int j = 0; j < nc; ++j) { 338 float sum = 0.0f; 339 340 for (int k = 0; k < nk; ++k) { 341 sum += src0[j*nk + k]*src1[i*nk + k]; 342 } 343 344 if (fabsf(dst[i*nc + j] - sum) > 1e-5f) { 345 fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum); 346 assert(false); 347 return false; 348 } 349 } 350 } 351 352 return true; 353 } 354 355 #define NUM_PERMUTATIONS (4*3*2*1) 356 357 int main(int argc, const char ** argv) { 358 struct ggml_init_params params = { 359 /* .mem_size = */ 256*1024*1024, 360 /* .mem_buffer = */ NULL, 361 /* .no_alloc = */ false, 362 }; 363 364 int64_t ne[4]; 365 366 int all_permutations[4 * NUM_PERMUTATIONS]; 367 { 368 int count = 0; 369 for (int ax0=0; ax0<4; ++ax0) { 370 for (int ax1=0; ax1<4; ++ax1) { 371 if (ax1 == ax0) continue; 372 for (int ax2=0; ax2<4; ++ax2) { 373 if (ax2 == ax0) continue; 374 if (ax2 == ax1) continue; 375 for (int ax3=0; ax3<4; ++ax3) { 376 if (ax3 == ax0) continue; 377 if (ax3 == ax1) continue; 378 if (ax3 == ax2) continue; 379 assert(count < NUM_PERMUTATIONS); 380 all_permutations[count*4+0] = ax0; 381 all_permutations[count*4+1] = ax1; 382 all_permutations[count*4+2] = ax2; 383 all_permutations[count*4+3] = ax3; 384 ++count; 385 } 386 } 387 } 388 } 389 } 390 391 unsigned seed_iter = 1; 392 393 // original loop: 1000 394 int niter = 4; 395 const char *env = getenv("GGML_NLOOP"); 396 if (env != NULL) { 397 niter = atoi(env); 398 } 399 if (argc > 1) { 400 niter = atoi(argv[1]); 401 } 402 for (int iter = 0; iter < niter; ++iter) { 403 srand(seed_iter); 404 seed_iter = rand(); 405 unsigned seed = rand(); 406 407 printf("test-grad0: iter:%d/%d\n", iter, niter); 408 struct ggml_context * ctx0 = ggml_init(params); 409 410 get_random_dims(ne, 4); 411 412 struct ggml_tensor * x[MAX_NARGS]; 413 414 // add f32 415 { 416 srand(seed); 417 const int nargs = 2; 418 419 for (int ndims = 1; ndims <= 4; ++ndims) { 420 for (int i = 0; i < nargs; ++i) { 421 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 422 ggml_set_param(ctx0, x[i]); 423 } 424 425 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); 426 427 check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); 428 } 429 } 430 431 // add f16 432 { 433 srand(seed); 434 const int nargs = 2; 435 436 for (int ndims = 1; ndims <= 4; ++ndims) { 437 for (int i = 0; i < nargs; ++i) { 438 x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); 439 ggml_set_param(ctx0, x[i]); 440 } 441 442 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); 443 444 check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); 445 } 446 } 447 448 // sub 449 { 450 srand(seed); 451 const int nargs = 2; 452 453 for (int ndims = 1; ndims <= 4; ++ndims) { 454 for (int i = 0; i < nargs; ++i) { 455 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 456 ggml_set_param(ctx0, x[i]); 457 } 458 459 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1])); 460 461 check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 462 } 463 } 464 465 // mul 466 { 467 srand(seed); 468 const int nargs = 2; 469 470 for (int ndims = 1; ndims <= 4; ++ndims) { 471 for (int i = 0; i < nargs; ++i) { 472 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 473 ggml_set_param(ctx0, x[i]); 474 } 475 476 struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1])); 477 478 check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 479 } 480 } 481 482 // div 483 { 484 srand(seed); 485 const int nargs = 2; 486 487 for (int ndims = 1; ndims <= 4; ++ndims) { 488 for (int i = 0; i < nargs; ++i) { 489 x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); 490 ggml_set_param(ctx0, x[i]); 491 } 492 493 struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1])); 494 495 check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f); 496 } 497 } 498 499 // sqr 500 { 501 srand(seed); 502 const int nargs = 1; 503 504 for (int ndims = 1; ndims <= 2; ++ndims) { 505 for (int i = 0; i < nargs; ++i) { 506 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 507 ggml_set_param(ctx0, x[i]); 508 } 509 510 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0])); 511 512 check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 513 } 514 } 515 516 // sqrt 517 { 518 srand(seed); 519 const int nargs = 1; 520 521 for (int ndims = 1; ndims <= 2; ++ndims) { 522 for (int i = 0; i < nargs; ++i) { 523 x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); 524 ggml_set_param(ctx0, x[i]); 525 } 526 527 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0])); 528 529 check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f); 530 } 531 } 532 533 // log 534 { 535 srand(seed); 536 const int nargs = 1; 537 538 for (int ndims = 1; ndims <= 2; ++ndims) { 539 for (int i = 0; i < nargs; ++i) { 540 x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); 541 ggml_set_param(ctx0, x[i]); 542 } 543 544 struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0])); 545 546 check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f); 547 } 548 } 549 550 // sum 551 { 552 srand(seed); 553 const int nargs = 1; 554 555 for (int ndims = 1; ndims <= 2; ++ndims) { 556 for (int i = 0; i < nargs; ++i) { 557 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 558 ggml_set_param(ctx0, x[i]); 559 } 560 561 struct ggml_tensor * f = ggml_sum(ctx0, x[0]); 562 563 check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 564 } 565 } 566 567 568 // sum_rows 569 { 570 srand(seed); 571 const int nargs = 1; 572 573 for (int ndims = 1; ndims <= 4; ++ndims) { 574 for (int i = 0; i < nargs; ++i) { 575 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 576 ggml_set_param(ctx0, x[i]); 577 } 578 579 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0]))); 580 581 check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); 582 } 583 } 584 585 // mean, not yet fully implemented 586 if(0) 587 { 588 srand(seed); 589 const int nargs = 1; 590 591 for (int ndims = 1; ndims <= 4; ++ndims) { 592 for (int i = 0; i < nargs; ++i) { 593 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 594 ggml_set_param(ctx0, x[i]); 595 } 596 597 struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); 598 599 check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 600 } 601 } 602 603 // argmax 604 if (0) 605 { 606 srand(seed); 607 const int nargs = 1; 608 609 for (int ndims = 1; ndims <= 4; ++ndims) { 610 for (int i = 0; i < nargs; ++i) { 611 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 612 ggml_set_param(ctx0, x[i]); 613 } 614 615 struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); 616 617 check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 618 } 619 } 620 621 // repeat 622 { 623 srand(seed); 624 int64_t ne2[4]; 625 get_random_dims(ne2, 4); 626 627 ne2[0] = ne[0] * ne2[0]; 628 ne2[1] = ne[1] * ne2[1]; 629 ne2[2] = 1; 630 ne2[3] = 1; 631 632 const int nargs = 1; 633 for (int ndims = 1; ndims <= 2; ++ndims) { 634 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 635 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); 636 ggml_set_param(ctx0, x[0]); 637 638 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); 639 640 check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); 641 } 642 } 643 644 // repeat back 645 { 646 srand(seed); 647 int64_t ne2[4]; 648 get_random_dims(ne2, 4); 649 650 ne2[0] = ne[0] * ne2[0]; 651 ne2[1] = ne[1] * ne2[1]; 652 ne2[2] = 1; 653 ne2[3] = 1; 654 655 const int nargs = 1; 656 for (int ndims = 1; ndims <= 2; ++ndims) { 657 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 658 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); 659 ggml_set_param(ctx0, x[0]); 660 661 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); 662 663 check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); 664 } 665 } 666 667 // abs (finite differences do not work) 668 //{ 669 // const int nargs = 1; 670 671 // for (int ndims = 1; ndims <= 2; ++ndims) { 672 // for (int i = 0; i < nargs; ++i) { 673 // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 674 // ggml_set_param(ctx0, x[i]); 675 // } 676 677 // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0])); 678 679 // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f); 680 // } 681 //} 682 683 // sgn 684 { 685 srand(seed); 686 const int nargs = 1; 687 688 for (int ndims = 1; ndims <= 4; ++ndims) { 689 for (int i = 0; i < nargs; ++i) { 690 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 691 ggml_set_param(ctx0, x[i]); 692 } 693 694 struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); 695 696 check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 697 } 698 } 699 700 // neg 701 { 702 srand(seed); 703 const int nargs = 1; 704 705 for (int ndims = 1; ndims <= 4; ++ndims) { 706 for (int i = 0; i < nargs; ++i) { 707 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 708 ggml_set_param(ctx0, x[i]); 709 } 710 711 struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); 712 713 check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 714 } 715 } 716 717 // step 718 { 719 srand(seed); 720 const int nargs = 1; 721 722 for (int ndims = 1; ndims <= 4; ++ndims) { 723 for (int i = 0; i < nargs; ++i) { 724 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 725 ggml_set_param(ctx0, x[i]); 726 } 727 728 struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); 729 730 check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 731 } 732 } 733 734 // tanh, not yet fully implemented 735 if(0) 736 { 737 srand(seed); 738 const int nargs = 1; 739 740 for (int ndims = 1; ndims <= 4; ++ndims) { 741 for (int i = 0; i < nargs; ++i) { 742 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 743 ggml_set_param(ctx0, x[i]); 744 } 745 746 struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); 747 748 check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 749 } 750 } 751 752 // mul_mat 753 { 754 srand(seed); 755 const int nargs = 2; 756 757 for (int ndims = 2; ndims <= 4; ++ndims) { 758 int max_nrep = (ndims >= 3) ? 2 : 1; 759 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 760 for (int nrep2 = 1; nrep2 < max_nrep; ++nrep2) { 761 for (int nrep3 = 1; nrep3 < max_nrep; ++nrep3) { 762 { 763 int64_t ne2[4]; 764 get_random_dims(ne2, 4); 765 ne2[0] = ne[0]; 766 ne2[2] = nrep2 * ne[2]; 767 ne2[3] = nrep3 * ne[3]; 768 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); 769 } 770 771 ggml_set_param(ctx0, x[0]); 772 ggml_set_param(ctx0, x[1]); 773 774 struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]); 775 struct ggml_tensor * f = ggml_sum(ctx0, m); 776 777 GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims); 778 779 check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 780 if (ndims == 2) { 781 // check_mat_mul does not support ndims > 2 782 check_mat_mul(m, x[1], x[0]); 783 } 784 } 785 } 786 } 787 } 788 789 // elu, not yet fully implemented 790 if(0) 791 { 792 srand(seed); 793 const int nargs = 1; 794 795 for (int ndims = 1; ndims <= 4; ++ndims) { 796 for (int i = 0; i < nargs; ++i) { 797 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 798 ggml_set_param(ctx0, x[i]); 799 } 800 801 struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); 802 803 check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 804 } 805 } 806 807 // relu 808 { 809 srand(seed); 810 const int nargs = 1; 811 812 for (int ndims = 1; ndims <= 4; ++ndims) { 813 for (int i = 0; i < nargs; ++i) { 814 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 815 ggml_set_param(ctx0, x[i]); 816 } 817 818 struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); 819 820 check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 821 } 822 } 823 824 // gelu, not yet fully implemented 825 if(0) 826 { 827 srand(seed); 828 const int nargs = 1; 829 830 for (int ndims = 1; ndims <= 4; ++ndims) { 831 for (int i = 0; i < nargs; ++i) { 832 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 833 ggml_set_param(ctx0, x[i]); 834 } 835 836 struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); 837 838 check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); 839 } 840 } 841 842 // silu 843 { 844 srand(seed); 845 const int nargs = 1; 846 847 for (int ndims = 1; ndims <= 2; ++ndims) { 848 for (int i = 0; i < nargs; ++i) { 849 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 850 ggml_set_param(ctx0, x[i]); 851 } 852 853 struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0])); 854 855 #ifdef GGML_SILU_FP16 856 // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds. 857 check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY); 858 #else 859 check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 860 #endif 861 } 862 } 863 864 // rms_norm 865 { 866 srand(seed); 867 const int nargs = 1; 868 869 for (int ndims = 1; ndims <= 2; ++ndims) { 870 for (int i = 0; i < nargs; ++i) { 871 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 872 ggml_set_param(ctx0, x[i]); 873 } 874 875 struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f)); 876 877 check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY); 878 } 879 } 880 881 // scale 882 { 883 srand(seed); 884 const int nargs = 1; 885 886 for (int ndims = 1; ndims <= 2; ++ndims) { 887 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 888 889 const float s = -1.0f + 2.0f*frand(); 890 891 ggml_set_param(ctx0, x[0]); 892 893 struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], s)); 894 895 check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 896 } 897 } 898 899 // cpy f32 900 { 901 srand(seed); 902 const int nargs = 2; 903 904 for (int ndims = 1; ndims <= 2; ++ndims) { 905 for (int i = 0; i < nargs; ++i) { 906 x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 907 ggml_set_param(ctx0, x[i]); 908 } 909 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] 910 911 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); 912 913 check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 914 } 915 } 916 917 // cpy f16 918 { 919 srand(seed); 920 const int nargs = 2; 921 922 for (int ndims = 1; ndims <= 2; ++ndims) { 923 for (int i = 0; i < nargs; ++i) { 924 x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); 925 ggml_set_param(ctx0, x[i]); 926 } 927 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] 928 929 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); 930 931 check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); 932 } 933 } 934 935 // reshape (1d->nd) 936 { 937 srand(seed); 938 const int nargs = 1; 939 940 for (int ndims = 1; ndims <= 2; ++ndims) { 941 int64_t ne2[4]; 942 ne2[0] = 1; 943 ne2[1] = 1; 944 ne2[2] = 1; 945 ne2[3] = 1; 946 for (int i = 0; i < ndims; ++i) { 947 ne2[0] *= ne[i]; 948 } 949 x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); 950 x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 951 ggml_set_param(ctx0, x[0]); 952 953 954 struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); 955 check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 956 } 957 } 958 959 // reshape (nd->1d) 960 { 961 srand(seed); 962 const int nargs = 1; 963 964 for (int ndims = 1; ndims <= 2; ++ndims) { 965 int64_t ne2[4]; 966 ne2[0] = 1; 967 ne2[1] = 1; 968 ne2[2] = 1; 969 ne2[3] = 1; 970 for (int i = 0; i < ndims; ++i) { 971 ne2[0] *= ne[i]; 972 } 973 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 974 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); 975 ggml_set_param(ctx0, x[0]); 976 977 978 struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); 979 check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 980 } 981 } 982 983 // acc 1d 984 { 985 srand(seed); 986 int64_t ne2[4] = { 1, 1, 1, 1 }; 987 988 const int nargs = 2; 989 for (int ndims = 1; ndims <= 4; ++ndims) { 990 991 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 992 ggml_set_param(ctx0, x[0]); 993 994 get_random_dims(ne2, 1); 995 while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { 996 get_random_dims(ne2, 1); 997 } 998 999 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); 1000 ggml_set_param(ctx0, x[1]); 1001 1002 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); 1003 const int offset = irand(max_offset) * ggml_element_size(x[0]); 1004 1005 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); 1006 1007 check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1008 } 1009 } 1010 1011 // acc 2d 1012 { 1013 srand(seed); 1014 int64_t ne2[4] = { 1, 1, 1, 1 }; 1015 int64_t max_offsets[4] = { 0, 0, 0, 0 }; 1016 int64_t offsets[4] = { 0, 0, 0, 0 }; 1017 1018 const int nargs = 2; 1019 for (int ndims = 2; ndims <= 4; ++ndims) { 1020 1021 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1022 ggml_set_param(ctx0, x[0]); 1023 1024 get_random_dims(ne2, 2); 1025 while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { 1026 get_random_dims(ne2, 2); 1027 } 1028 1029 x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); 1030 ggml_set_param(ctx0, x[1]); 1031 1032 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); 1033 max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); 1034 offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; 1035 offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; 1036 const int offset = offsets[0] + offsets[1]; 1037 1038 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); 1039 1040 check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1041 } 1042 } 1043 1044 // acc 3d 1045 { 1046 srand(seed); 1047 int64_t ne2[4] = { 1, 1, 1, 1 }; 1048 int64_t max_offsets[4] = { 0, 0, 0, 0 }; 1049 int64_t offsets[4] = { 0, 0, 0, 0 }; 1050 1051 const int nargs = 2; 1052 for (int ndims = 3; ndims <= 4; ++ndims) { 1053 1054 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1055 ggml_set_param(ctx0, x[0]); 1056 1057 get_random_dims(ne2, 3); 1058 while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) { 1059 get_random_dims(ne2, 3); 1060 } 1061 1062 x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); 1063 ggml_set_param(ctx0, x[1]); 1064 1065 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); 1066 max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); 1067 max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); 1068 offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; 1069 offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; 1070 offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; 1071 const int offset = offsets[0] + offsets[1] + offsets[2]; 1072 1073 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); 1074 1075 check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1076 } 1077 } 1078 1079 // acc 4d 1080 { 1081 srand(seed); 1082 int64_t ne2[4] = { 1, 1, 1, 1 }; 1083 int64_t max_offsets[4] = { 0, 0, 0, 0 }; 1084 int64_t offsets[4] = { 0, 0, 0, 0 }; 1085 1086 const int nargs = 2; 1087 for (int ndims = 4; ndims <= 4; ++ndims) { 1088 1089 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1090 ggml_set_param(ctx0, x[0]); 1091 1092 get_random_dims(ne2, 4); 1093 while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) { 1094 get_random_dims(ne2, 4); 1095 } 1096 1097 x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); 1098 ggml_set_param(ctx0, x[1]); 1099 1100 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); 1101 max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); 1102 max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); 1103 max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]); 1104 offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; 1105 offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; 1106 offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; 1107 offsets[3] = irand(max_offsets[3]) * x[0]->nb[3]; 1108 const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3]; 1109 1110 struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); 1111 1112 check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1113 } 1114 } 1115 1116 // set_1d 1117 { 1118 srand(seed); 1119 int64_t ne2[4]; 1120 1121 const int nargs = 2; 1122 for (int ndims = 1; ndims <= 4; ++ndims) { 1123 1124 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1125 ggml_set_param(ctx0, x[0]); 1126 1127 get_random_dims(ne2, 1); 1128 while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { 1129 get_random_dims(ne2, 1); 1130 } 1131 1132 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); 1133 ggml_set_param(ctx0, x[1]); 1134 1135 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); 1136 const int offset = irand(max_offset) * ggml_element_size(x[0]); 1137 1138 struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset)); 1139 1140 check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1141 } 1142 } 1143 1144 // set_2d 1145 { 1146 srand(seed); 1147 int64_t ne2[4]; 1148 int64_t max_offsets[4] = { 0, 0, 0, 0 }; 1149 int64_t offsets[4] = { 0, 0, 0, 0 }; 1150 1151 const int nargs = 1; 1152 for (int ndims = 2; ndims <= 4; ++ndims) { 1153 1154 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1155 ggml_set_param(ctx0, x[0]); 1156 1157 get_random_dims(ne2, 2); 1158 while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { 1159 get_random_dims(ne2, 2); 1160 } 1161 1162 x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); 1163 ggml_set_param(ctx0, x[1]); 1164 1165 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); 1166 max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); 1167 offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; 1168 offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; 1169 const int offset = offsets[0] + offsets[1]; 1170 1171 struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset)); 1172 1173 check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1174 } 1175 } 1176 1177 // view_1d 1178 { 1179 srand(seed); 1180 const int nargs = 1; 1181 for (int ndims = 1; ndims <= 4; ++ndims) { 1182 1183 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1184 1185 ggml_set_param(ctx0, x[0]); 1186 1187 const int k0 = irand(ggml_nelements(x[0])); 1188 const int k1 = irand(ggml_nelements(x[0])); 1189 const int i0 = MIN(k0, k1); 1190 const int i1 = MAX(k0, k1); 1191 1192 const int offset = i0 * sizeof(float); 1193 const int nelem = i1 - i0; 1194 1195 struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset)); 1196 1197 check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1198 } 1199 } 1200 1201 // view_2d 1202 { 1203 srand(seed); 1204 int64_t ne2[4]; 1205 int64_t nb2[4]; 1206 1207 const int nargs = 1; 1208 for (int ndims = 1; ndims <= 4; ++ndims) { 1209 1210 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1211 1212 get_random_dims(ne2, 2); 1213 while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { 1214 get_random_dims(ne2, 2); 1215 } 1216 const int count = ne2[0]*ne2[1]; 1217 1218 nb2[0] = sizeof(float); 1219 nb2[1] = nb2[0]*ne2[0]; 1220 1221 ggml_set_param(ctx0, x[0]); 1222 1223 const int max_offset = ggml_nelements(x[0]) - count; 1224 const int offset = irand(max_offset+1) * sizeof(float); 1225 1226 struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset)); 1227 1228 check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1229 } 1230 } 1231 1232 // view_3d 1233 { 1234 srand(seed); 1235 int64_t ne2[4] = {1,1,1,1}; 1236 int64_t nb2[4] = {0,0,0,0}; 1237 1238 const int nargs = 1; 1239 for (int ndims = 1; ndims <= 4; ++ndims) { 1240 1241 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1242 1243 get_random_dims(ne2, 3); 1244 while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { 1245 get_random_dims(ne2, 3); 1246 } 1247 const int count = ne2[0]*ne2[1]*ne2[2]; 1248 1249 nb2[0] = sizeof(float); 1250 nb2[1] = nb2[0]*ne2[0]; 1251 nb2[2] = nb2[1]*ne2[1]; 1252 1253 ggml_set_param(ctx0, x[0]); 1254 1255 const int max_offset = ggml_nelements(x[0]) - count; 1256 const int offset = irand(max_offset+1) * sizeof(float); 1257 1258 struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset)); 1259 1260 check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1261 } 1262 } 1263 1264 // permute 1265 { 1266 srand(seed); 1267 int64_t ne2[4]; 1268 1269 const int nargs = 1; 1270 for (int ndims = 1; ndims <= 4; ++ndims) 1271 { 1272 // ggml_permute will set axes of dimensions below n_dims to 1. 1273 // to make ggml_permute work correctly on all axes, 1274 // the input tensor needs maximal n_dim of 4. 1275 for (int i=0; i<ndims; ++i) { 1276 ne2[i] = ne[i]; 1277 } 1278 for (int i=ndims; i<4; ++i) { 1279 ne2[i] = 1; 1280 } 1281 x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); 1282 1283 ggml_set_param(ctx0, x[0]); 1284 1285 const int p = irand(NUM_PERMUTATIONS); 1286 const int ax0 = all_permutations[p*4+0]; 1287 const int ax1 = all_permutations[p*4+1]; 1288 const int ax2 = all_permutations[p*4+2]; 1289 const int ax3 = all_permutations[p*4+3]; 1290 1291 // sum requires contiguous tensor rows 1292 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3))); 1293 1294 check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1295 } 1296 } 1297 1298 // transpose 1299 { 1300 srand(seed); 1301 int64_t ne2[4]; 1302 1303 const int nargs = 1; 1304 for (int ndims = 1; ndims <= 4; ++ndims) 1305 { 1306 // ggml_transpose will set axes of dimensions below n_dims to 1. 1307 // to make ggml_transpose work correctly on all axes, 1308 // the input tensor needs maximal n_dim of 4. 1309 for (int i=0; i<ndims; ++i) { 1310 ne2[i] = ne[i]; 1311 } 1312 for (int i=ndims; i<4; ++i) { 1313 ne2[i] = 1; 1314 } 1315 x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); 1316 1317 ggml_set_param(ctx0, x[0]); 1318 1319 // sum requires contiguous tensor rows 1320 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0]))); 1321 1322 check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1323 } 1324 } 1325 1326 // get_rows 1327 { 1328 srand(seed); 1329 int64_t ne2[4] = {ne[0], ne[1], 1, 1}; 1330 int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; 1331 const int nargs = 1; 1332 const int ndims = 2; 1333 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); 1334 x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); 1335 1336 ggml_set_param(ctx0, x[0]); 1337 1338 struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1])); 1339 1340 check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1341 } 1342 1343 // diag_mask_inf 1344 { 1345 srand(seed); 1346 const int nargs = 1; 1347 const int ndims = 2; 1348 1349 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1350 ggml_set_param(ctx0, x[0]); 1351 1352 int n_past = irand(ne[0]); 1353 1354 struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past)); 1355 1356 check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1357 } 1358 1359 // diag_mask_zero 1360 { 1361 srand(seed); 1362 const int nargs = 1; 1363 const int ndims = 2; 1364 1365 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); 1366 ggml_set_param(ctx0, x[0]); 1367 1368 int n_past = irand(ne[0]); 1369 1370 struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past)); 1371 1372 check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); 1373 } 1374 1375 // softmax 1376 { 1377 srand(seed); 1378 const int nargs = 1; 1379 1380 int64_t ne2[4]; 1381 get_random_dims(ne2, 4); 1382 1383 for (int ndims = 1; ndims <= 3; ++ndims) { 1384 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); 1385 ggml_set_param(ctx0, x[0]); 1386 1387 float eps = 1e-6f; 1388 // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work 1389 // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0) 1390 struct ggml_tensor * f = ggml_sum(ctx0, 1391 ggml_log(ctx0, 1392 ggml_add1(ctx0, 1393 ggml_scale(ctx0, 1394 ggml_soft_max(ctx0, x[0]), 1395 1.0f - eps), 1396 ggml_new_f32(ctx0, eps)))); 1397 1398 check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY); 1399 // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf. 1400 // this may result in different gradients too finite differences. 1401 // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause. 1402 // if only the table lookup causes gradients to differ this is acceptable. 1403 } 1404 } 1405 1406 // cross_entropy_loss 1407 { 1408 srand(seed); 1409 const int nargs = 1; 1410 1411 int64_t ne2[4]; 1412 get_random_dims(ne2, 4); 1413 1414 for (int ndims = 1; ndims <= 4; ++ndims) { 1415 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -0.1f, 0.1f); 1416 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); 1417 // the second argument to cross_entropy_loss must sum up to 1 for each row 1418 int nr = ggml_nrows(x[1]); 1419 int nc = ggml_nelements(x[1]) / nr; 1420 for (int ir = 0; ir < nr; ++ir) { 1421 float sum = 0; 1422 for (int ic = 0; ic < nc; ++ic) { 1423 sum += ((float *) x[1]->data)[ic + ir*nc]; 1424 } 1425 for (int ic = 0; ic < nc; ++ic) { 1426 ((float *) x[1]->data)[ic + ir*nc] /= sum; 1427 } 1428 } 1429 ggml_set_param(ctx0, x[0]); 1430 1431 struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]); 1432 1433 check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-4f, 1e-3f, INFINITY); 1434 } 1435 } 1436 1437 // rope f32 1438 { 1439 srand(seed); 1440 const int nargs = 1; 1441 1442 int64_t ne2[4]; 1443 get_random_dims(ne2, 4); 1444 ne2[0] += ne2[0] % 2; 1445 int n_rot = ne2[0]; 1446 1447 for (int ndims = 3; ndims <= 4; ++ndims) { 1448 for (int mode = 0; mode < 4; ++mode) { 1449 for (int n_past = 1; n_past < ne2[2]; ++n_past) { 1450 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); 1451 1452 struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]); 1453 for (int i = 0; i < ne2[2]; ++i) { 1454 ((int32_t *) p->data)[i] = n_past + i; 1455 } 1456 1457 ggml_set_param(ctx0, x[0]); 1458 1459 const bool skip_past = (mode & 1); 1460 if (skip_past) { 1461 // we have no past, so this would have to work on uninitialized memory. 1462 // we only test the gradients here; 1463 // skip_past should have no influence on gradient computation. 1464 // so when other modes work, we assume that this does as well. 1465 continue; 1466 } 1467 1468 struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode)); 1469 1470 GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); 1471 check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); 1472 } 1473 } 1474 } 1475 } 1476 1477 // rope f16 1478 { 1479 srand(seed); 1480 const int nargs = 1; 1481 1482 int64_t ne2[4]; 1483 get_random_dims(ne2, 4); 1484 ne2[0] += ne2[0] % 2; 1485 int n_rot = ne2[0]; 1486 1487 for (int ndims = 3; ndims <= 4; ++ndims) { 1488 for (int mode = 0; mode < 4; ++mode) { 1489 for (int n_past = 1; n_past < ne2[2]; ++n_past) { 1490 x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); 1491 1492 struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]); 1493 for (int i = 0; i < ne2[2]; ++i) { 1494 ((int32_t *) p->data)[i] = n_past + i; 1495 } 1496 1497 ggml_set_param(ctx0, x[0]); 1498 1499 const bool skip_past = (mode & 1); 1500 if (skip_past) { 1501 // we have no past, so this would have to work on uninitialized memory. 1502 // we only test the gradients here; 1503 // skip_past should have no influence on gradient computation. 1504 // so when other modes work, we assume that this does as well. 1505 continue; 1506 } 1507 1508 struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode)); 1509 1510 GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); 1511 check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); 1512 } 1513 } 1514 } 1515 } 1516 1517 // flash_attn f32 1518 // TODO: adapt to ggml_flash_attn_ext() changes 1519 //{ 1520 // srand(seed); 1521 // const int nargs = 3; 1522 1523 // int64_t ne2[4]; 1524 1525 // get_random_dims(ne2, 4); 1526 // int64_t D = ne2[0]; 1527 // int64_t N = ne2[1]; 1528 // int64_t M = ne2[2] + N; 1529 // int64_t B = ne2[3]; 1530 1531 // for (int masked = 0; masked <= 1; ++masked) { 1532 // for (int ndims = 2; ndims <= 4; ++ndims) { 1533 // int max_nrep = (ndims >= 3) ? 2 : 1; 1534 // for (int nrep = 1; nrep < max_nrep; ++nrep) { 1535 // int64_t neq[4] = { D, N, B*nrep, ne[3] }; 1536 // int64_t nek[4] = { D, M, B, ne[3] }; 1537 // int64_t nev[4] = { M, D, B, ne[3] }; 1538 // if (ndims == 2) { 1539 // neq[2] = 1; neq[3] = 1; 1540 // nek[2] = 1; nek[3] = 1; 1541 // nev[2] = 1; nev[3] = 1; 1542 // } else if (ndims == 3) { 1543 // neq[3] = 1; 1544 // nek[3] = 1; 1545 // nev[3] = 1; 1546 // } 1547 // x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); 1548 // x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); 1549 // x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); 1550 // ggml_set_param(ctx0, x[0]); 1551 // ggml_set_param(ctx0, x[1]); 1552 // ggml_set_param(ctx0, x[2]); 1553 1554 // struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); 1555 1556 // check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY); 1557 // } 1558 // } 1559 // } 1560 //} 1561 1562 ggml_free(ctx0); 1563 } 1564 1565 return 0; 1566 }