/ gc.cpp
gc.cpp
 1  
 2  # define DEBUG_GC false
 3  
 4  
 5  // placeholder macro, actual definition in backends/cuda/move_data
 6  #define checkCudaErrors
 7  
 8  
 9  // todo-high:
10  //  for cpu backend, the number of tensors created, quickly reaches MAX_GC: e.g. 5,041,911 for BS=128
11  //  set MAX_GC to 2**23 (8,388,608)?
12  //
13  // the ternary operator is not evaluated at pre-processor time
14  // #define MAX_GC (DEVICE == CUDA) ? 1024 : 2**23
15  #if DEVICE == CUDA
16      #define MAX_GC 1024
17  #elif DEVICE == CPU
18      #define MAX_GC 8388608
19  #endif
20  
21  tensor* GC[MAX_GC];
22  int GC_IDX = -1;
23  
24  void add_to_gc(tensor* t){
25      if (GC_IDX+1 >= MAX_GC){
26          printf("Error: max GC len reached\n");
27          exit(1);
28      }
29      GC[++GC_IDX] = t;
30      // note: bc add_to_gc runs before set_name, it's not meaningful to print t->name here (since it's a default random name)
31      // printf("[add_to_gc] GC_IDX: %i\n", GC_IDX);
32  }
33  
34  void free_tensor(tensor* t){
35      if (t == NULL){
36          // printf("tensor has already been freed: cannot free\n");
37          return;
38      }
39      if (t->device != CPU && t->device != CUDA){
40          printf("[free_tensor] unexpected device for tensor %s\n", t->name);
41          exit(1);
42      }
43  
44      // *** free cuda memory ***
45  
46      // t->grad was also created with a tensor constructor, so it was separately added
47      // to the GC list thus should not free t->grad here (as part of freeing t), bc otherwise
48      // when GC reaches t->grad and tries to free it -- will result in a double free
49      //
50      // similarly, don't free t->inputs -- assume they tensors which were separately created by
51      // calling a tensor constructor, which means they are already in the GC list
52      //
53      // same for t->scratch_space
54  
55      // *** free device data ***
56  
57      // if (DEBUG){
58      //     printf("Freeing %s, GC_IDX: %i\n", t->name, GC_IDX);
59      // }
60      if (t->data != NULL){
61          if (t->device==CUDA){
62              checkCudaErrors(cudaFree(t->data));
63          } else {
64              free(t->data);
65          }
66          t->data = NULL;
67      }
68  
69      // *** free cpu memory ***
70  
71      if (t->name != NULL){
72          free(t->name);
73          t->name = NULL;
74      }
75  
76      // note: these are just pointers on the tensor struct, this will be deallocated with the struct itself
77      //  (grad_fn, backward, shape, stride, device, num_dims, num_inputs, num_uses)
78      free(t);
79      t = NULL;
80  
81      // todo-low: free all non_grad_inputs
82  }
83  
84  void free_all_tensors(int idx_until){
85      // idx_until to avoid free'in weights, and the "dataset" (though can free the "batch")
86      if (DEBUG_GC) printf("GC_IDX: %i\n", GC_IDX);
87      for (; GC_IDX>idx_until; GC_IDX--){
88          free_tensor(GC[GC_IDX]);
89      }
90  }