| | """ |
| | Contain the implementation of a simple neural network |
| | Author: Son Phat Tran |
| | """ |
| | import numpy as np |
| | from utils import sigmoid, sigmoid_derivative |
| |
|
| |
|
| | class ConventionalNeuralNetwork: |
| | def __init__(self, input_size, hidden_size): |
| | """ |
| | Create a two-layer neural network |
| | NOTE: |
| | - The network does not include any bias b |
| | - The network uses the sigmoid activation function |
| | :param input_size: size of the input vector |
| | :param hidden_size: size of the hidden layer |
| | :return: |
| | """ |
| | |
| | self.input_size = input_size |
| | self.hidden_size = hidden_size |
| |
|
| | |
| | self.W1 = np.random.normal(size=(self.input_size, self.hidden_size)) |
| | self.W2 = np.random.normal(size=(self.hidden_size, 1)) |
| |
|
| | |
| | self.cache = {} |
| |
|
| | def forward(self, x_train, y_train): |
| | """ |
| | Perform the forward pass of the neural network |
| | :param x_train: the training input of the neural network |
| | :param y_train: the training |
| | :return: the output of the neural network |
| | """ |
| | |
| | a1 = x_train @ self.W1 |
| | z1 = sigmoid(a1) |
| |
|
| | |
| | a2 = z1 @ self.W2 |
| |
|
| | |
| | self.cache = { |
| | "x_train": x_train, |
| | "y_train": y_train, |
| | "a1": a1, |
| | "z1": z1, |
| | "a2": a2 |
| | } |
| |
|
| | |
| | score = (1 / 2) * np.sum((y_train.reshape(-1) - a2.reshape(-1)) ** 2) / y_train.shape[0] |
| | return a2, score |
| |
|
| | def predict(self, x_test): |
| | """ |
| | Perform the prediction |
| | :param x_test: the test points |
| | :return: the output prediction |
| | """ |
| | |
| | a1 = x_test @ self.W1 |
| | z1 = sigmoid(a1) |
| |
|
| | |
| | a2 = z1 @ self.W2 |
| | return a2 |
| |
|
| | def backward(self, learning_rate): |
| | """ |
| | Perform back-propagation |
| | :param learning_rate: Learning rate of back-propagation |
| | :return: None |
| | """ |
| | |
| | x_train, y_train, a1, z1, a2 = self.cache["x_train"], self.cache["y_train"], \ |
| | self.cache["a1"], self.cache["z1"], self.cache["a2"] |
| |
|
| | |
| | d_a2 = (a2 - y_train.reshape(-1, 1)).reshape(-1, 1) |
| |
|
| | |
| | d_z1 = d_a2 @ self.W2.T |
| |
|
| | |
| | d_W2 = z1.T @ d_a2 |
| |
|
| | |
| | d_a1 = d_z1 * sigmoid_derivative(a1) |
| |
|
| | |
| | d_W1 = x_train.T @ d_a1 |
| |
|
| | |
| | self.W1 -= learning_rate * d_W1 |
| | self.W2 -= learning_rate * d_W2 |
| |
|
| |
|
| |
|
| |
|
| |
|