/ init.cpp
init.cpp
  1  #include "nn.h"
  2  
  3  
  4  // uniform random numbers in [-0.5-0.5]
  5  void uniform_init(tensor* t) {
  6      float* dst = t->data;
  7      for (int i=0; i<t->size; i++) {
  8          // https://linux.die.net/man/3/random
  9          // returns a pseudo-random int between 0 and RAND_MAX
 10          // normalize to: 0 - 1
 11          // shift to: -0.5 - 0.5
 12  
 13          // not truncating to 0 due to int division, bc C promotes args
 14          dst[i] = ((float)rand() / RAND_MAX) - 0.5;
 15      }
 16  }
 17  
 18  
 19  
 20  // Mersenne Twister
 21  #include <random>
 22  // Seed C++ generator
 23  std::mt19937 mt(std::random_device{}());
 24  // Something like the Box-Muller method
 25  std::normal_distribution<float> normal_dist{0.0, 1.0};
 26  std::uniform_real_distribution<float> uniform_distribution(0.0, 1.0);
 27  
 28  void normal_init(tensor* t){
 29      for (int i=0; i<t->size; i++){
 30          t->data[i] = (float)normal_dist(mt) * 0.1;
 31      }
 32  }
 33  
 34  // void unifrom_init(tensor* t){
 35  //     for (int i=0; i<t->size; i++){
 36  //         t->data[i] = (float)uniform_distribution(mt) - 0.5;
 37  //     }
 38  // }
 39  
 40  
 41  
 42  
 43  // https://github.com/pytorch/pytorch/blob/5802be698eff17cf4b6284056dc8e89c48befc00/torch/nn/init.py#L345
 44  tuple* _calculate_fan_in_and_fan_out(tensor* t){
 45      if (t->num_dims < 2){
 46          printf("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions\n");
 47          exit(1);
 48      }
 49  
 50      int num_input_fmaps = t->shape[1];
 51      int num_output_fmaps = t->shape[0];
 52      int receptive_field_size = 1;
 53  
 54      if (t->num_dims > 2) {
 55          for (int dim_idx=2; dim_idx<t->num_dims; dim_idx++){
 56              int s = t->shape[dim_idx];
 57              // printf("iterating over s: %i\n", s);
 58              receptive_field_size *= s;
 59          }
 60      }
 61      int fan_in = num_input_fmaps * receptive_field_size;
 62      int fan_out = num_output_fmaps * receptive_field_size;
 63      return get_tuple(fan_in, fan_out);
 64  }
 65  
 66  // https://github.com/pytorch/pytorch/blob/78bff1e8c1bd0b30e27fbc79d5a14a1c5a92d4a7/torch/nn/init.py#L119C5-L120C30
 67  float _calculate_gain(const char* nonlinearity){
 68      if (!nonlinearity) {
 69          return 1.0;
 70      } else if (strcmp(nonlinearity, "relu") == 0){
 71          return sqrt(2.0);
 72      } else {
 73          printf("[calculate_gain] Unsupported nonlinearity %s\n", nonlinearity);
 74          exit(1);
 75      }
 76  }
 77  
 78  // can't create Tensor inside this fn:
 79  //  - causes infinite recursion -- this fn is called from the
 80  //    constructor, and creating a tensor would call the constructor
 81  //  - further, it would create a bunch of new tensors which don't get
 82  //    added to GC (bc they happen when initializing params -- IOW before the "gc_until" is set)
 83  
 84  // todo-high:
 85  //  - for now hardcoding gain, but -- gain should be 2 if ReLU is followed by the layer, or 1 if not
 86  //  - all the intermidiate tensors (e.g. created inside an op) -- will also call
 87  //    this initialization which is probably undesirable (probably don't have relu after them)?
 88  
 89  // todo-high: init bias any differently?
 90  //  https://github.com/pytorch/pytorch/blob/a86fa779ce3482324a0d1fbb12d87a95a981f0a3/torch/nn/modules/linear.py#L114
 91  
 92  // https://github.com/pytorch/pytorch/blob/78bff1e8c1bd0b30e27fbc79d5a14a1c5a92d4a7/torch/nn/init.py#L516-L518
 93  void kaiming_uniform_init(tensor* t){
 94      float gain = _calculate_gain("relu");
 95      float fan_in = _calculate_fan_in_and_fan_out(t)->item_1;
 96      float std = gain / sqrt(fan_in);
 97  
 98      // Calculate uniform bounds from standard deviation
 99      float bound = sqrt(3.0) * std;
100  
101      // uses more sophisticated mt19937 PRNG
102      for (int i=0; i<t->size; i++){
103          float uniform = uniform_distribution(mt); // ((float)mt() / mt.max());
104  
105          // transform random variable from a uniform distribution in range [0, 1] to a uniform distribution in range [−bound, bound]
106          //  - shift from [0, 1] to [-0.5, 0.5] by subtracting 0.5
107          //  - scale by 2 * bound to spread to [-bound, bound]
108          uniform = 2*bound * (uniform-0.5);
109          t->data[i] = uniform;
110  
111          // // similar range as my uniform_init -- -0.5:0.5
112          // uniform -= 0.5;
113          // t->data[i] = uniform * std;
114      }
115  }
116  
117  
118  // https://github.com/pytorch/pytorch/blob/78bff1e8c1bd0b30e27fbc79d5a14a1c5a92d4a7/torch/nn/init.py#L521
119  void kaiming_normal_init(tensor* t){
120      float gain = _calculate_gain("relu");
121      float fan_in = _calculate_fan_in_and_fan_out(t)->item_1;
122      float std = gain / sqrt(fan_in);
123  
124      // uses more sophisticated mt19937 PRNG
125      for (int i=0; i<t->size; i++){
126          float normal = (float)normal_dist(mt);
127          t->data[i] = normal * std;
128      }
129  }