/ init.cpp
init.cpp
1 #include "nn.h" 2 3 4 // uniform random numbers in [-0.5-0.5] 5 void uniform_init(tensor* t) { 6 float* dst = t->data; 7 for (int i=0; i<t->size; i++) { 8 // https://linux.die.net/man/3/random 9 // returns a pseudo-random int between 0 and RAND_MAX 10 // normalize to: 0 - 1 11 // shift to: -0.5 - 0.5 12 13 // not truncating to 0 due to int division, bc C promotes args 14 dst[i] = ((float)rand() / RAND_MAX) - 0.5; 15 } 16 } 17 18 19 20 // Mersenne Twister 21 #include <random> 22 // Seed C++ generator 23 std::mt19937 mt(std::random_device{}()); 24 // Something like the Box-Muller method 25 std::normal_distribution<float> normal_dist{0.0, 1.0}; 26 std::uniform_real_distribution<float> uniform_distribution(0.0, 1.0); 27 28 void normal_init(tensor* t){ 29 for (int i=0; i<t->size; i++){ 30 t->data[i] = (float)normal_dist(mt) * 0.1; 31 } 32 } 33 34 // void unifrom_init(tensor* t){ 35 // for (int i=0; i<t->size; i++){ 36 // t->data[i] = (float)uniform_distribution(mt) - 0.5; 37 // } 38 // } 39 40 41 42 43 // https://github.com/pytorch/pytorch/blob/5802be698eff17cf4b6284056dc8e89c48befc00/torch/nn/init.py#L345 44 tuple* _calculate_fan_in_and_fan_out(tensor* t){ 45 if (t->num_dims < 2){ 46 printf("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions\n"); 47 exit(1); 48 } 49 50 int num_input_fmaps = t->shape[1]; 51 int num_output_fmaps = t->shape[0]; 52 int receptive_field_size = 1; 53 54 if (t->num_dims > 2) { 55 for (int dim_idx=2; dim_idx<t->num_dims; dim_idx++){ 56 int s = t->shape[dim_idx]; 57 // printf("iterating over s: %i\n", s); 58 receptive_field_size *= s; 59 } 60 } 61 int fan_in = num_input_fmaps * receptive_field_size; 62 int fan_out = num_output_fmaps * receptive_field_size; 63 return get_tuple(fan_in, fan_out); 64 } 65 66 // https://github.com/pytorch/pytorch/blob/78bff1e8c1bd0b30e27fbc79d5a14a1c5a92d4a7/torch/nn/init.py#L119C5-L120C30 67 float _calculate_gain(const char* nonlinearity){ 68 if (!nonlinearity) { 69 return 1.0; 70 } else if (strcmp(nonlinearity, "relu") == 0){ 71 return sqrt(2.0); 72 } else { 73 printf("[calculate_gain] Unsupported nonlinearity %s\n", nonlinearity); 74 exit(1); 75 } 76 } 77 78 // can't create Tensor inside this fn: 79 // - causes infinite recursion -- this fn is called from the 80 // constructor, and creating a tensor would call the constructor 81 // - further, it would create a bunch of new tensors which don't get 82 // added to GC (bc they happen when initializing params -- IOW before the "gc_until" is set) 83 84 // todo-high: 85 // - for now hardcoding gain, but -- gain should be 2 if ReLU is followed by the layer, or 1 if not 86 // - all the intermidiate tensors (e.g. created inside an op) -- will also call 87 // this initialization which is probably undesirable (probably don't have relu after them)? 88 89 // todo-high: init bias any differently? 90 // https://github.com/pytorch/pytorch/blob/a86fa779ce3482324a0d1fbb12d87a95a981f0a3/torch/nn/modules/linear.py#L114 91 92 // https://github.com/pytorch/pytorch/blob/78bff1e8c1bd0b30e27fbc79d5a14a1c5a92d4a7/torch/nn/init.py#L516-L518 93 void kaiming_uniform_init(tensor* t){ 94 float gain = _calculate_gain("relu"); 95 float fan_in = _calculate_fan_in_and_fan_out(t)->item_1; 96 float std = gain / sqrt(fan_in); 97 98 // Calculate uniform bounds from standard deviation 99 float bound = sqrt(3.0) * std; 100 101 // uses more sophisticated mt19937 PRNG 102 for (int i=0; i<t->size; i++){ 103 float uniform = uniform_distribution(mt); // ((float)mt() / mt.max()); 104 105 // transform random variable from a uniform distribution in range [0, 1] to a uniform distribution in range [−bound, bound] 106 // - shift from [0, 1] to [-0.5, 0.5] by subtracting 0.5 107 // - scale by 2 * bound to spread to [-bound, bound] 108 uniform = 2*bound * (uniform-0.5); 109 t->data[i] = uniform; 110 111 // // similar range as my uniform_init -- -0.5:0.5 112 // uniform -= 0.5; 113 // t->data[i] = uniform * std; 114 } 115 } 116 117 118 // https://github.com/pytorch/pytorch/blob/78bff1e8c1bd0b30e27fbc79d5a14a1c5a92d4a7/torch/nn/init.py#L521 119 void kaiming_normal_init(tensor* t){ 120 float gain = _calculate_gain("relu"); 121 float fan_in = _calculate_fan_in_and_fan_out(t)->item_1; 122 float std = gain / sqrt(fan_in); 123 124 // uses more sophisticated mt19937 PRNG 125 for (int i=0; i<t->size; i++){ 126 float normal = (float)normal_dist(mt); 127 t->data[i] = normal * std; 128 } 129 }