Spaces:

azizalto
/

vanilla-ml-algorithms

Runtime error

App Files Files Community

vanilla-ml-algorithms / ml_algorithms /linear_regression_gradient_descent.py

azizalto

docs

b673599 over 3 years ago

raw

history blame

6.04 kB

	# src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e


	class LinearRegressionGradient:
	def __init__(self, theta=None):
	self.theta = theta
	self.loss_ = float("inf")

	def hypothesis(self, x):
	return self.theta[0] + self.theta[1] * x

	def loss(self, X, y):
	m = len(X)
	return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m)

	def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01):
	m = len(X)

	for j in range(num_iter):

	# predict
	h = list(map(self.hypothesis, X))

	# compute slope, aka derivative with current params (theta)
	deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m
	deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m

	# update parameters (moving against the gradient 'derivative')
	theta[0] = theta[0] - alpha * deri_th0
	theta[1] = theta[1] - alpha * deri_th1

	# report
	if j % 200 == 0:
	self.loss_ = self.loss(X, y)
	msg = f"loss: {self.loss_}"
	print(msg)


	def app():
	import streamlit as st

	def header():
	st.subheader("Linear Regression using Gradient Descent")
	desc = """> Plain Python (vanilla version) i.e. without importing any library"""
	st.markdown(desc)

	header()

	st1, st2 = st.columns(2)
	with st1:
	code_math()
	with st2:
	interactive_run()

	st.markdown(
	f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)."
	)


	def code_math():
	import inspect
	import streamlit as st

	tex = st.latex
	write = st.write
	mark = st.write
	codify = lambda func: st.code(inspect.getsource(func), language="python")
	cls = LinearRegressionGradient(theta=[0, 0])

	write("The class")
	codify(cls.__init__)

	write("the Hypothesis")
	tex(r"""h_\theta(x) = \theta_0 + \theta_1x""")
	codify(cls.hypothesis)
	mark('The Loss/Objective/Cost function "_minimize_"')
	tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""")
	codify(cls.loss)
	write("The Gradient Descent algorithm")
	mark("> repeat until converge {")
	tex(
	r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )"""
	)
	tex(
	r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})"""
	)
	mark("> }")
	codify(cls.gradientDescent)


	def interactive_run():
	import streamlit as st
	import numpy as np

	mark = st.markdown
	tex = st.latex

	def random_data(n=10):
	def sample_linear_regression_dataset(n):
	# src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/
	import numpy as np
	from sklearn import datasets
	import matplotlib.pyplot as plt # for plotting

	x, y, coef = datasets.make_regression(
	n_samples=n, # number of samples
	n_features=1, # number of features
	n_informative=1, # number of useful features
	noise=40, # bias and standard deviation of the guassian noise
	coef=True, # true coefficient used to generated the data
	random_state=0,
	) # set for same data points for each run

	# Scale feature x (years of experience) to range 0..20
	# x = np.interp(x, (x.min(), x.max()), (0, 20))

	# Scale target y (salary) to range 20000..150000
	# y = np.interp(y, (y.min(), y.max()), (20000, 150000))

	plt.ion() # interactive plot on
	plt.plot(x, y, ".", label="training data")
	plt.xlabel("Years of experience")
	plt.ylabel("Salary $")
	plt.title("Experience Vs. Salary")
	# st.pyplot(plt.show())
	# st.write(type(x.tolist()))
	# st.write(x.tolist())

	X, y = x.reshape(x.shape[0],), y.reshape(
	y.shape[0],
	)
	return np.around(X, 2), np.around(y, 2)
	# return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()]
	# return [item for sublist in x.tolist() for item in sublist], [
	# item for sublist in y for item in sublist
	# ]

	X_, y_ = sample_linear_regression_dataset(n)
	return X_, y_
	# st.write(type(X_), type(y_))
	# st.write(type(np.round(X, 2).tolist()))
	# st.write(X_) # , y_)
	# return X, y

	# return np.around(X, 2).tolist(), np.around(y, 2).tolist()

	X, y = random_data()
	theta = [0, 0] # initial values
	model = LinearRegressionGradient(theta)
	n = st.slider("Number of samples", min_value=10, max_value=200, step=10)
	if st.button("generate new data and solve"):
	X, y = random_data(n=n)
	mark("_Input_")
	mark(f"_X_ = {X}")
	mark(f"_y_ = {y}")
	model.gradientDescent(X, y, theta) # run to optimize thetas
	mark("_Solution_")
	tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution
	tex(f"loss = {model.loss_}")

	mark("> How to run")
	mark(
	"""
	```python
	X, y = random_data()
	theta = [0, 0] # initial values
	model = LinearRegressionGradient(theta)
	model.gradientDescent(X, y, theta) # run "i.e. optimize thetas"
	# print solution
	# print(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x")
	# print(f"loss = {model.loss_}")
	```
	"""
	)
	# -- visualize
	import matplotlib.pyplot as plt

	fig, ax = plt.subplots()
	ax.scatter(X, y, label="Linear Relation")
	y_pred = theta[0] + theta[1] * np.array(X)
	ax.plot(X, y_pred)
	ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
	ax.legend(loc=2)
	# ax.axis("scaled")
	st.pyplot(fig)
	# st.line_chart(X, y)