/ autograd.cpp
autograd.cpp
1 #include <deque> // deque from standard template library (STL) 2 #include "nn.h" 3 4 #define IS_DEBUG_AG false 5 #define IS_DEBUG_BRANCHES false 6 7 8 9 void backward(tensor* loss){ 10 11 // open and close in write mode to clear the file, doing it here avoid needing to modify interface of lprint (which in general should NOT clear the file) 12 // fclose(fopen("./generated/log.txt", "w")); 13 14 if (!loss->grad_fn) { 15 printf("[autograd engine] Error: tensor has no grad_fn\n"); 16 exit(1); 17 } 18 19 // todo: in autograd, don't overwrite grad instead do += 20 21 // allocating grad buff[s] in ops previously led to err where, grad on 22 // the last node was set to start backprop loop "*e->grad = 1.0;" 23 // but bc buffer for grad is only allocated inside an Op, but e is never 24 // used by an op (e is last node in the computational graph) -- "*e->grad = 1.0;" 25 // is illegal as it the buffer hasn't ben allocated 26 // - one way to fix is allocate grad buff for all tensors in Tensor constructor 27 // - however, I do like that grad buff[s] are lazily created only when tensor is used. 28 // Which amounts to creating it here (in ops). 29 30 // for the check below to pass 31 loss->num_uses = 0; 32 loss->grad = TensorLikeFill(loss, 1.0); 33 34 std::deque <tensor*> ready; 35 ready.push_front(loss); 36 while (ready.size() > 0) { 37 tensor* t = ready.back(); ready.pop_back(); 38 39 if (t->num_uses!=0){ 40 // push to the end the same thing we popped 41 ready.push_front(t); 42 43 if (IS_DEBUG_BRANCHES){ 44 printf("[autograd engine] pushed again %s (num_uses: %i)\n", t->name, t->num_uses); 45 if (t->num_uses==-1) // -1 46 return; 47 } 48 continue; 49 } 50 51 if (IS_DEBUG_BRANCHES) 52 printf("[autograd engine] done %s's grad\n", t->name); 53 54 const char* op_name = OP_NAMES[t->op_type]; 55 if (IS_DEBUG_AG) 56 printf("[autograd engine] %s\n", op_name); 57 58 // comment: 59 // when a tensor used by multiple ops, it's incorrect to access t->grad OR call t->grad_fn, 60 // until output->grad_fn was not called on both of the outputs of the current op 61 62 if (!t->grad_fn || !t->grad) { 63 printf("[autograd engine] Error: tensor has no grad_fn\n"); 64 exit(1); 65 } 66 67 // each input of this op will have this as an upstream grad 68 tensor* upstream = t->grad; 69 // step once for the op -- propagates grad wrt all inputs of the op 70 t->grad_fn(upstream, t); 71 72 if (IS_DEBUG_AG) 73 printf("[autograd engine] ran %s backward fn\n", t->name); 74 75 for (int i=0; i<t->num_inputs; i++){ 76 tensor* inp = t->inputs[i]; 77 78 // this condition can be used if decide to not compute ->grad 79 // for some of the inputs to an op (e.g. idx->grad in select_bwd) 80 // printf("[autograd engine] pushing: %s\n", inp->name); 81 // if (!inp->grad){ 82 // printf("[autograd engine] %s has no ->grad field\n", inp->name); 83 // continue; 84 // } 85 86 // will record pointers to all seen names -- to avid visiting same nodes twice, when 87 // exp 88 // / \ 89 // x1 x2 90 // 91 // check if we already visited this node 92 bool is_pushed = false; 93 // iterate over all quese and see if the "inp" is already pushed 94 for (size_t ii=0; ii<ready.size(); ii++){ 95 if (ready.at(ii)->name == inp->name){ 96 is_pushed = true; 97 break; 98 } 99 } 100 101 // leaf tensors have no grad_fn, so don't push them on the queue 102 // bc for each value pop'ed from the queue at later iterations, 103 // this value's grad_fn will be called 104 if (!inp->is_leaf && !is_pushed) { 105 ready.push_front(inp); 106 } 107 108 // bc just called grad_fn of one of the outputs (t) of this tensor (inp) 109 inp->num_uses--; 110 111 // printf("\n%s's grad", inp->name); 112 113 if (IS_DEBUG_AG){ 114 char buffer[30]; 115 sprintf(buffer, "%s_grad", inp->name); 116 set_name(inp->grad, buffer); 117 lprint(inp->grad); 118 } 119 } 120 } 121 }