network.py
1 import json 2 import random as _random 3 from typing import List 4 5 from matrix import Matrix 6 from trainingdata import TrainingData 7 8 9 class Network: 10 def __init__(self, weights: List[Matrix], biases: List[Matrix]): 11 assert len(weights) == len(biases) 12 self._weights = weights 13 self._biases = biases 14 self._layer_sizes = [] 15 16 self._update_layer_sizes() 17 18 def _update_layer_sizes(self): 19 self._layer_sizes = [self._weights[0].cols()] 20 for i in range(len(self._weights)): 21 assert self._weights[i].rows() == self._biases[i].rows() 22 assert self._layer_sizes[i] == self._weights[i].cols() 23 assert self._biases[i].cols() == 1 24 self._layer_sizes.append(self._weights[i].rows()) 25 26 @classmethod 27 def random_network(cls, layer_sizes: List[int]): 28 return cls([ 29 Matrix.random(layer_sizes[i + 1], layer_sizes[i]) 30 for i in range(len(layer_sizes) - 1) 31 ], [ 32 Matrix.random(layer_sizes[i + 1], 1) 33 for i in range(len(layer_sizes) - 1) 34 ]) 35 36 def feedforward(self, input_data): 37 input_data = self.prepare_input(input_data) 38 assert input_data.cols() == 1 and input_data.rows() == self._layer_sizes[0] 39 for w, b in zip(self._weights, self._biases): 40 input_data = (w.dot(input_data) + b).apply_function(self.activation) 41 return self.interpret_output(input_data) 42 43 def backpropagation(self, training_data: TrainingData) -> (List[Matrix], List[Matrix]): 44 delta_w = [Matrix.filled_with(w.rows(), w.cols()) for w in self._weights] 45 delta_b = [Matrix.filled_with(b.rows(), 1) for b in self._biases] 46 47 layer = self.prepare_input(training_data.input_data) 48 activations = [layer] 49 zs = [] 50 51 # feed forward 52 for w, b in zip(self._weights, self._biases): 53 z = w.dot(layer) + b 54 zs.append(z) 55 layer = z.apply_function(self.activation) 56 activations.append(layer) 57 58 # propagate backwards 59 expected = self.prepare_expected_output(training_data.expected_output) 60 delta = (layer - expected) * zs[-1].apply_function(self.activation_derivative) 61 delta_w[-1] = delta.dot(activations[-2].transpose()) 62 delta_b[-1] = delta 63 for l in range(2, len(self._layer_sizes)): 64 z = zs[-l] 65 sp = z.apply_function(self.activation_derivative) 66 delta = self._weights[-l + 1].transpose().dot(delta) * sp 67 delta_w[-l] = delta.dot(activations[-l - 1].transpose()) 68 delta_b[-l] = delta 69 return delta_w, delta_b 70 71 def update_mini_batch(self, mini_batch: List[TrainingData], learn_rate: float): 72 delta_w = [Matrix.filled_with(w.rows(), w.cols()) for w in self._weights] 73 delta_b = [Matrix.filled_with(b.rows(), 1) for b in self._biases] 74 75 for td in mini_batch: 76 dw, db = self.backpropagation(td) 77 delta_w = [x + y for x, y in zip(delta_w, dw)] 78 delta_b = [x + y for x, y in zip(delta_b, db)] 79 80 self._weights = [w - dw * (learn_rate / len(mini_batch)) for w, dw in zip(self._weights, delta_w)] 81 self._biases = [b - db * (learn_rate / len(mini_batch)) for b, db in zip(self._biases, delta_b)] 82 83 def train(self, training_data: List[TrainingData], epochs: int, mini_batch_size: int, learn_rate: float, 84 validation_data: List[TrainingData] = None, save_file: str = None): 85 best_accuracy = 0 86 if validation_data: 87 best_accuracy = self.evaluate(validation_data) 88 print(f"Epoch 0: {best_accuracy}") 89 for epoch in range(epochs): 90 _random.shuffle(training_data) 91 for i in range(0, len(training_data), mini_batch_size): 92 mini_batch = training_data[i:i + mini_batch_size] 93 self.update_mini_batch(mini_batch, learn_rate) 94 if validation_data: 95 accuracy = self.evaluate(validation_data) 96 if accuracy > best_accuracy: 97 best_accuracy = accuracy 98 self.save_to_file(save_file) 99 print(f"Epoch {epoch + 1}: {accuracy}") 100 else: 101 print(f"Epoch {epoch + 1} complete") 102 103 def save_to_file(self, filepath: str): 104 with open(filepath, "w") as f: 105 json.dump({ 106 "layer_sizes": self._layer_sizes, 107 "weights": [w.get_matrix() for w in self._weights], 108 "biases": [b.to_vector() for b in self._biases] 109 }, f) 110 f.flush() 111 112 @classmethod 113 def load_file(cls, filepath: str): 114 with open(filepath) as f: 115 data = json.load(f) 116 out = cls( 117 [Matrix(w) for w in data["weights"]], 118 [Matrix.from_vector(b) for b in data["biases"]] 119 ) 120 assert out._layer_sizes == data["layer_sizes"], "Invalid layer sizes" 121 return out 122 123 # abstract methods 124 @staticmethod 125 def activation(x: float) -> float: 126 pass 127 128 @staticmethod 129 def activation_derivative(x: float) -> float: 130 pass 131 132 def prepare_expected_output(self, expected) -> Matrix: 133 pass 134 135 def prepare_input(self, inp) -> Matrix: 136 pass 137 138 def interpret_output(self, out: Matrix): 139 pass 140 141 def evaluate(self, test_data: List[TrainingData]) -> float: 142 pass