Cradicle Explorer

network.py
  1  import json
  2  import random as _random
  3  from typing import List
  4  
  5  from matrix import Matrix
  6  from trainingdata import TrainingData
  7  
  8  
  9  class Network:
 10      def __init__(self, weights: List[Matrix], biases: List[Matrix]):
 11          assert len(weights) == len(biases)
 12          self._weights = weights
 13          self._biases = biases
 14          self._layer_sizes = []
 15  
 16          self._update_layer_sizes()
 17  
 18      def _update_layer_sizes(self):
 19          self._layer_sizes = [self._weights[0].cols()]
 20          for i in range(len(self._weights)):
 21              assert self._weights[i].rows() == self._biases[i].rows()
 22              assert self._layer_sizes[i] == self._weights[i].cols()
 23              assert self._biases[i].cols() == 1
 24              self._layer_sizes.append(self._weights[i].rows())
 25  
 26      @classmethod
 27      def random_network(cls, layer_sizes: List[int]):
 28          return cls([
 29              Matrix.random(layer_sizes[i + 1], layer_sizes[i])
 30              for i in range(len(layer_sizes) - 1)
 31          ], [
 32              Matrix.random(layer_sizes[i + 1], 1)
 33              for i in range(len(layer_sizes) - 1)
 34          ])
 35  
 36      def feedforward(self, input_data):
 37          input_data = self.prepare_input(input_data)
 38          assert input_data.cols() == 1 and input_data.rows() == self._layer_sizes[0]
 39          for w, b in zip(self._weights, self._biases):
 40              input_data = (w.dot(input_data) + b).apply_function(self.activation)
 41          return self.interpret_output(input_data)
 42  
 43      def backpropagation(self, training_data: TrainingData) -> (List[Matrix], List[Matrix]):
 44          delta_w = [Matrix.filled_with(w.rows(), w.cols()) for w in self._weights]
 45          delta_b = [Matrix.filled_with(b.rows(), 1) for b in self._biases]
 46  
 47          layer = self.prepare_input(training_data.input_data)
 48          activations = [layer]
 49          zs = []
 50  
 51          # feed forward
 52          for w, b in zip(self._weights, self._biases):
 53              z = w.dot(layer) + b
 54              zs.append(z)
 55              layer = z.apply_function(self.activation)
 56              activations.append(layer)
 57  
 58          # propagate backwards
 59          expected = self.prepare_expected_output(training_data.expected_output)
 60          delta = (layer - expected) * zs[-1].apply_function(self.activation_derivative)
 61          delta_w[-1] = delta.dot(activations[-2].transpose())
 62          delta_b[-1] = delta
 63          for l in range(2, len(self._layer_sizes)):
 64              z = zs[-l]
 65              sp = z.apply_function(self.activation_derivative)
 66              delta = self._weights[-l + 1].transpose().dot(delta) * sp
 67              delta_w[-l] = delta.dot(activations[-l - 1].transpose())
 68              delta_b[-l] = delta
 69          return delta_w, delta_b
 70  
 71      def update_mini_batch(self, mini_batch: List[TrainingData], learn_rate: float):
 72          delta_w = [Matrix.filled_with(w.rows(), w.cols()) for w in self._weights]
 73          delta_b = [Matrix.filled_with(b.rows(), 1) for b in self._biases]
 74  
 75          for td in mini_batch:
 76              dw, db = self.backpropagation(td)
 77              delta_w = [x + y for x, y in zip(delta_w, dw)]
 78              delta_b = [x + y for x, y in zip(delta_b, db)]
 79  
 80          self._weights = [w - dw * (learn_rate / len(mini_batch)) for w, dw in zip(self._weights, delta_w)]
 81          self._biases = [b - db * (learn_rate / len(mini_batch)) for b, db in zip(self._biases, delta_b)]
 82  
 83      def train(self, training_data: List[TrainingData], epochs: int, mini_batch_size: int, learn_rate: float,
 84                validation_data: List[TrainingData] = None, save_file: str = None):
 85          best_accuracy = 0
 86          if validation_data:
 87              best_accuracy = self.evaluate(validation_data)
 88              print(f"Epoch 0: {best_accuracy}")
 89          for epoch in range(epochs):
 90              _random.shuffle(training_data)
 91              for i in range(0, len(training_data), mini_batch_size):
 92                  mini_batch = training_data[i:i + mini_batch_size]
 93                  self.update_mini_batch(mini_batch, learn_rate)
 94              if validation_data:
 95                  accuracy = self.evaluate(validation_data)
 96                  if accuracy > best_accuracy:
 97                      best_accuracy = accuracy
 98                      self.save_to_file(save_file)
 99                  print(f"Epoch {epoch + 1}: {accuracy}")
100              else:
101                  print(f"Epoch {epoch + 1} complete")
102  
103      def save_to_file(self, filepath: str):
104          with open(filepath, "w") as f:
105              json.dump({
106                  "layer_sizes": self._layer_sizes,
107                  "weights": [w.get_matrix() for w in self._weights],
108                  "biases": [b.to_vector() for b in self._biases]
109              }, f)
110              f.flush()
111  
112      @classmethod
113      def load_file(cls, filepath: str):
114          with open(filepath) as f:
115              data = json.load(f)
116          out = cls(
117              [Matrix(w) for w in data["weights"]],
118              [Matrix.from_vector(b) for b in data["biases"]]
119          )
120          assert out._layer_sizes == data["layer_sizes"], "Invalid layer sizes"
121          return out
122  
123      # abstract methods
124      @staticmethod
125      def activation(x: float) -> float:
126          pass
127  
128      @staticmethod
129      def activation_derivative(x: float) -> float:
130          pass
131  
132      def prepare_expected_output(self, expected) -> Matrix:
133          pass
134  
135      def prepare_input(self, inp) -> Matrix:
136          pass
137  
138      def interpret_output(self, out: Matrix):
139          pass
140  
141      def evaluate(self, test_data: List[TrainingData]) -> float:
142          pass