/ nn.h
nn.h
1 // include guards 2 #ifndef NN_H_INCLUDED 3 #define NN_H_INCLUDED 4 5 #define MAX_RANK 4 6 #define MAX_INPUTS 3 7 #define MAX_TENSOR_NAME 20 // unique for each tensor 8 #define MAX_SCRATCH_SPACE 1 9 10 // todo: avoid needing to manually sync increment op_type, NUM_OPS, VIS_COLORS when adding a new op 11 // - use graphviz's pastel19 or set312 color scheme ? 12 #define NUM_OPS 23 13 const char* OP_NAMES[] = {"add", "sub", "mul", "matmul", "pow", "reduce_sum", "relu", "transpose", "batched_matmul", "conv", "batched_conv", "maxpool", "batched_maxpool", "batched_flatten", "select", "log", "exp", "batched_reduce_sum", "repeat", "neg", "div", "reduce_max", "batched_reduce_max"}; 14 const char* VIS_COLORS[] = {"darkolivegreen1", "lightsalmon1", "skyblue1", "plum1", "mediumpurple1", "aquamarine", "yellow", "seashell", "orchid2", "deeppink1", "deeppink3", "darkseagreen1", "darkseagreen3", "beige", "bisque", "cornsilk", "darkolivegreen1", "tan1", "deepskyblue", "chocolate", "slateblue", "lemonchiffon", "lightgoldenrodyellow"}; 15 16 17 // tensor and its fn's are used in both ops.cpp and main.cpp 18 struct tensor { 19 float* data; 20 int shape[MAX_RANK]; 21 int device; 22 23 // https://arxiv.org/pdf/1102.1523 24 // Strides the number of bytes to skip in memory to 25 // proceed to the next element. For a (10, 10) 26 // array of bytes, for example, the strides may be 27 // (10, 1), in other words: proceed one byte to 28 // get to the next column and ten bytes to locate 29 // the next row. 30 int stride[MAX_RANK]; 31 32 // rank 33 int num_dims; 34 35 // to avoid: int size = x->shape[0] * x->shape[1]; 36 int size; 37 38 // for autograd engine: 39 40 bool is_leaf; 41 42 tensor* grad; 43 void (*grad_fn)(tensor* upstream, tensor* out); 44 45 int num_inputs; 46 tensor* inputs[MAX_INPUTS]; 47 // question-now: malloc memory for this? 48 int non_grad_inputs[MAX_INPUTS]; 49 int num_uses; // for the AG engine: num outputs of the this tensor left to call their out->grad_fn, before calling grad_fn on the current tensor 50 int _num_uses; 51 52 // to store data recorded in fwd and used in bwd (wt needing to recompute it in bwd) 53 // e.g. in relu, reduce_max, maxpool: idxs recorded during forward _k and used in its corresponding _bwd fn 54 tensor* scratch_space[MAX_SCRATCH_SPACE]; 55 56 char* name; 57 // use char as small int 58 int op_type; 59 60 void (*backward)(tensor* t); 61 }; 62 63 64 // enum device { 65 // cpu = 1, 66 // cuda = 2, 67 // // amd = 3, 68 // // tpu = 4, 69 // }; 70 71 // note: do not use 0 to denote a valid device, it conflicts with NULL pointer 72 // (e.g. checks like "t->device==CPU", if CPU is 0) 73 #define CPU 1 74 #define CUDA 2 75 76 // these two will only be used if DEVICE == CUDA 77 #define NUM_THREADS 16 78 #define CUDA_DEBUG false 79 #define DATA_COPY_DEBUG false 80 81 void uniform_init(tensor*); 82 void kaiming_normal_init(tensor*); 83 void set_name(tensor*, const char*); 84 char* random_chars(int); 85 86 tensor* TensorLikeFill(tensor*, float); 87 tensor* TensorLike(tensor* t); 88 void* checkMallocErrors(void* ptr); 89 90 void print(tensor*); 91 void lprint(tensor*); 92 void cuda_lprint(tensor*); 93 void sprint(tensor* t); 94 95 struct param { 96 tensor* value; 97 // sgd: 98 tensor* velocity; 99 // adam: 100 int t; 101 float beta1; 102 float beta2; 103 float epsilon; 104 tensor* first_moment; 105 tensor* second_moment; 106 107 param* next; 108 }; 109 110 struct tuple { 111 float item_1; 112 float item_2; 113 }; 114 115 116 tuple* get_tuple(float val1, float val2); 117 118 #endif