/ gc.cpp
gc.cpp
1 2 # define DEBUG_GC false 3 4 5 // placeholder macro, actual definition in backends/cuda/move_data 6 #define checkCudaErrors 7 8 9 // todo-high: 10 // for cpu backend, the number of tensors created, quickly reaches MAX_GC: e.g. 5,041,911 for BS=128 11 // set MAX_GC to 2**23 (8,388,608)? 12 // 13 // the ternary operator is not evaluated at pre-processor time 14 // #define MAX_GC (DEVICE == CUDA) ? 1024 : 2**23 15 #if DEVICE == CUDA 16 #define MAX_GC 1024 17 #elif DEVICE == CPU 18 #define MAX_GC 8388608 19 #endif 20 21 tensor* GC[MAX_GC]; 22 int GC_IDX = -1; 23 24 void add_to_gc(tensor* t){ 25 if (GC_IDX+1 >= MAX_GC){ 26 printf("Error: max GC len reached\n"); 27 exit(1); 28 } 29 GC[++GC_IDX] = t; 30 // note: bc add_to_gc runs before set_name, it's not meaningful to print t->name here (since it's a default random name) 31 // printf("[add_to_gc] GC_IDX: %i\n", GC_IDX); 32 } 33 34 void free_tensor(tensor* t){ 35 if (t == NULL){ 36 // printf("tensor has already been freed: cannot free\n"); 37 return; 38 } 39 if (t->device != CPU && t->device != CUDA){ 40 printf("[free_tensor] unexpected device for tensor %s\n", t->name); 41 exit(1); 42 } 43 44 // *** free cuda memory *** 45 46 // t->grad was also created with a tensor constructor, so it was separately added 47 // to the GC list thus should not free t->grad here (as part of freeing t), bc otherwise 48 // when GC reaches t->grad and tries to free it -- will result in a double free 49 // 50 // similarly, don't free t->inputs -- assume they tensors which were separately created by 51 // calling a tensor constructor, which means they are already in the GC list 52 // 53 // same for t->scratch_space 54 55 // *** free device data *** 56 57 // if (DEBUG){ 58 // printf("Freeing %s, GC_IDX: %i\n", t->name, GC_IDX); 59 // } 60 if (t->data != NULL){ 61 if (t->device==CUDA){ 62 checkCudaErrors(cudaFree(t->data)); 63 } else { 64 free(t->data); 65 } 66 t->data = NULL; 67 } 68 69 // *** free cpu memory *** 70 71 if (t->name != NULL){ 72 free(t->name); 73 t->name = NULL; 74 } 75 76 // note: these are just pointers on the tensor struct, this will be deallocated with the struct itself 77 // (grad_fn, backward, shape, stride, device, num_dims, num_inputs, num_uses) 78 free(t); 79 t = NULL; 80 81 // todo-low: free all non_grad_inputs 82 } 83 84 void free_all_tensors(int idx_until){ 85 // idx_until to avoid free'in weights, and the "dataset" (though can free the "batch") 86 if (DEBUG_GC) printf("GC_IDX: %i\n", GC_IDX); 87 for (; GC_IDX>idx_until; GC_IDX--){ 88 free_tensor(GC[GC_IDX]); 89 } 90 }