/ serialization.cpp
serialization.cpp
1 // todo-now: 2 // think more about how would loading_all_params interact with the GC list? 3 // simply using save_tensor / load_tensor likely will not correctly work with the GC 4 5 6 7 void save_tensor(tensor* t, FILE* f){ 8 9 // COPY_FROM_DEVICE returns a new tensor, which removes the other fields 10 // (besides t->data) -- but I want to preserve the name of the original param; 11 // I hesitate to just copy t->name in COPY_FROM_DEVICE, because this will result 12 // in two different tensors having the same name unless COPY_FROM_DEVICE 13 // immediately frees the cpu tensor without weighting for the GC to do it later; 14 // 15 // todo-high: longer term change COPY_FROM_DEVICE to modify t->data instead of returning a new tensor? 16 char* name = t->name; 17 18 if (t->device == CUDA){ 19 t = COPY_FROM_DEVICE(t); 20 } 21 22 // write / read in a specific order: 23 24 // (1) fixed sized 25 26 fwrite(&t->num_dims, sizeof(int), 1, f); 27 fwrite(&t->size, sizeof(int), 1, f); 28 fwrite(&t->is_leaf, sizeof(bool), 1, f); 29 30 // // not meaningful to save/load these fields: 31 // fwrite(&t->device, sizeof(int), 1, f); 32 // fwrite(&t->num_inputs, sizeof(int), 1, f); 33 // fwrite(&t->num_uses, sizeof(int), 1, f); 34 // fwrite(&t->_num_uses, sizeof(int), 1, f); 35 36 // (2) dynamic sized members 37 38 fwrite(t->data, sizeof(float), t->size, f); 39 fwrite(t->shape, sizeof(int), MAX_RANK, f); 40 fwrite(t->stride, sizeof(int), MAX_RANK, f); 41 fwrite(name, sizeof(char), MAX_TENSOR_NAME, f); 42 43 // // not meaningful to save/load these fields -- weight 44 // // is a leaf tensor, it does not have these fields: 45 // t->op_type; 46 // t->grad; 47 // t->scratch_space; 48 // t->inputs; 49 // t->non_grad_inputs; 50 51 // // not meaningful to save/load these -- function 52 // // pointers don't make sense when the program restarts 53 // t->grad_fn = NULL; 54 // t->backward = backward; 55 } 56 57 58 // this fn assumes exact order in which struct members were written to the file (inside "save_tensor" fn) 59 tensor* load_tensor(FILE* f){ 60 tensor* t = (tensor*)checkMallocErrors(malloc(sizeof(tensor))); 61 62 // (1) fixed sized: 63 64 fread(&t->num_dims, sizeof(int), 1, f); 65 fread(&t->size, sizeof(int), 1, f); 66 fread(&t->is_leaf, sizeof(bool), 1, f); 67 // fread(&t->device, sizeof(int), 1, f); 68 // fread(&t->op_type, sizeof(int), 1, f); 69 // fread(&t->num_inputs, sizeof(int), 1, f); 70 // fread(&t->num_uses, sizeof(int), 1, f); 71 // fread(&t->_num_uses, sizeof(int), 1, f); 72 73 // (2) dynamic sized members: 74 75 t->data = (float*)checkMallocErrors(malloc(sizeof(float) * t->size)); 76 fread(t->data, sizeof(float), t->size, f); 77 78 // question-now: I don't think need to allocate space for these -- memory 79 // for the array members is included when "malloc(sizeof(tensor))" above 80 fread(t->shape, sizeof(int), MAX_RANK, f); 81 fread(t->stride, sizeof(int), MAX_RANK, f); 82 83 t->name = (char*)checkMallocErrors(malloc(sizeof(char) * MAX_TENSOR_NAME)); 84 fread(t->name, sizeof(char), MAX_TENSOR_NAME, f); 85 86 // (3) set defaults 87 t->grad_fn = NULL; 88 t->grad = NULL; 89 t->backward = backward; 90 // COPY_TO_DEVICE expects CPU, sometimes in my constructors 91 // I don't set the device -- so it's some random value 92 t->device = CPU; 93 t->num_inputs = 0; 94 t->num_uses = 0; 95 96 // mv to device 97 98 // note: need to do the copying after done reading all 99 // the fields -- otherwise reading gets corrupted 100 if (DEVICE == CUDA){ 101 COPY_TO_DEVICE(t); 102 } 103 104 if (ferror(f)){ 105 t = NULL; 106 } 107 108 return t; 109 } 110 111 112 void save_param(param* p, FILE* f){ 113 114 // write / read in a specific order: 115 116 // (1) fixed sized 117 118 fwrite(&p->t, sizeof(int), 1, f); 119 fwrite(&p->beta1, sizeof(float), 1, f); 120 fwrite(&p->beta2, sizeof(float), 1, f); 121 fwrite(&p->epsilon, sizeof(float), 1, f); 122 123 // can't meaningfully save/restore these: 124 // p->next; 125 126 // (2) dynamic sized members 127 128 save_tensor(p->value, f); 129 save_tensor(p->velocity, f); 130 save_tensor(p->first_moment, f); 131 save_tensor(p->second_moment, f); 132 } 133 134 135 param* load_param(FILE* f){ 136 137 param* p = (param*)checkMallocErrors(malloc(sizeof(param))); 138 139 fread(&p->t, sizeof(int), 1, f); 140 fread(&p->beta1, sizeof(float), 1, f); 141 fread(&p->beta2, sizeof(float), 1, f); 142 fread(&p->epsilon, sizeof(float), 1, f); 143 144 p->value = load_tensor(f); 145 p->velocity = load_tensor(f); 146 p->first_moment = load_tensor(f); 147 p->second_moment = load_tensor(f); 148 149 // set defaults 150 p->next = NULL; 151 152 if (ferror(f)){ 153 p = NULL; 154 } 155 156 return p; 157 } 158 159 160 void save_all_params(const char* prefix, int ep_idx){ 161 162 char path[50]; 163 snprintf(path, sizeof(char) * 50, "./generated/checkpoints/%s.dat", prefix); 164 165 // flush buffer 166 FILE *f = fopen(static_cast<const char*>(path), "w"); 167 if (!f) { 168 printf("Error opening file\n"); 169 exit(1); 170 } 171 fclose(f); 172 173 f = fopen(static_cast<const char*>(path), "a"); 174 175 // used in load_all_params to know how many times to iterate 176 int num_params = count_params(); 177 fwrite(&num_params, sizeof(int), 1, f); 178 179 float learning_rate = LR; 180 fwrite(&learning_rate, sizeof(float), 1, f); 181 182 fwrite(&ep_idx, sizeof(int), 1, f); 183 184 param* temp = param_head; 185 while (temp){ 186 save_param(temp, f); 187 temp = temp->next; 188 } 189 fclose(f); 190 191 printf("[save_all_params] Parameters saved.\n"); 192 } 193 194 195 int load_all_params(char* prefix){ 196 if (param_head){ 197 printf("[load_all_params] loading params when then param list is not empty -- is not supported\n"); 198 exit(1); 199 } 200 201 char path[50]; 202 snprintf(path, sizeof(char) * 50, "./generated/checkpoints/%s.dat", prefix); 203 204 FILE *f = fopen(static_cast<const char*>(path), "rb"); 205 if (!f) { 206 printf("Error opening file\n"); 207 exit(1); 208 } 209 210 int num_params; 211 fread(&num_params, sizeof(int), 1, f); 212 213 // not used at the moment 214 float learning_rate; 215 fread(&learning_rate, sizeof(float), 1, f); 216 217 // need to return this to continue incrementing ep_idxs after loading 218 // (instead of starting again from 0), and avoid overwriting previous checkpoints 219 int ep_idx; 220 fread(&ep_idx, sizeof(int), 1, f); 221 222 param* loaded; 223 for (int i=0; i<num_params; i++){ 224 loaded = load_param(f); 225 printf("[load_all_params] loaded param for %s tensor\n", loaded->value->name); 226 loaded->next = param_head; 227 param_head = loaded; 228 } 229 fclose(f); 230 231 printf("[load_all_params] Parameters loaded.\n"); 232 return ep_idx; 233 }