Deep-Learning-Models / conventional_neural_network.py

Upload 3 files

cf386c6 verified 4 months ago

2.93 kB

	"""
	Contain the implementation of a simple neural network
	Author: Son Phat Tran
	"""
	import numpy as np
	from utils import sigmoid, sigmoid_derivative


	class ConventionalNeuralNetwork:
	def __init__(self, input_size, hidden_size):
	"""
	Create a two-layer neural network
	NOTE:
	- The network does not include any bias b
	- The network uses the sigmoid activation function
	:param input_size: size of the input vector
	:param hidden_size: size of the hidden layer
	:return:
	"""
	# Cache the size
	self.input_size = input_size
	self.hidden_size = hidden_size

	# Create the layer
	self.W1 = np.random.normal(size=(self.input_size, self.hidden_size))
	self.W2 = np.random.normal(size=(self.hidden_size, 1))

	# Create a cache
	self.cache = {}

	def forward(self, x_train, y_train):
	"""
	Perform the forward pass of the neural network
	:param x_train: the training input of the neural network
	:param y_train: the training
	:return: the output of the neural network
	"""
	# Calculate the output of the first layer
	a1 = x_train @ self.W1
	z1 = sigmoid(a1)

	# Calculate the output of the second layer
	a2 = z1 @ self.W2

	# Cache the values
	self.cache = {
	"x_train": x_train,
	"y_train": y_train,
	"a1": a1,
	"z1": z1,
	"a2": a2
	}

	# Calculate the error function
	score = (1 / 2) * np.sum((y_train.reshape(-1) - a2.reshape(-1)) ** 2) / y_train.shape[0]
	return a2, score

	def predict(self, x_test):
	"""
	Perform the prediction
	:param x_test: the test points
	:return: the output prediction
	"""
	# Calculate the output of the first layer
	a1 = x_test @ self.W1
	z1 = sigmoid(a1)

	# Calculate the output of the second layer
	a2 = z1 @ self.W2
	return a2

	def backward(self, learning_rate):
	"""
	Perform back-propagation
	:param learning_rate: Learning rate of back-propagation
	:return: None
	"""
	# Get cached values
	x_train, y_train, a1, z1, a2 = self.cache["x_train"], self.cache["y_train"], \
	self.cache["a1"], self.cache["z1"], self.cache["a2"]

	# Calculate the gradient w.r.t a2
	d_a2 = (a2 - y_train.reshape(-1, 1)).reshape(-1, 1)

	# Calculate the gradient w.r.t z1
	d_z1 = d_a2 @ self.W2.T

	# Calculate the gradient w.r.t W2
	d_W2 = z1.T @ d_a2

	# Calculate the gradient w.r.t a1
	d_a1 = d_z1 * sigmoid_derivative(a1)

	# Calculate the gradient w.r.t W1
	d_W1 = x_train.T @ d_a1

	# Perform back-prop
	self.W1 -= learning_rate * d_W1
	self.W2 -= learning_rate * d_W2