# src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e class LinearRegressionGradient: def __init__(self, theta=None): self.theta = theta self.loss_ = float("inf") def hypothesis(self, x): return self.theta[0] + self.theta[1] * x def loss(self, X, y): m = len(X) return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m) def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01): m = len(X) for j in range(num_iter): # predict h = list(map(self.hypothesis, X)) # compute slope, aka derivative with current params (theta) deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m # update parameters (moving against the gradient 'derivative') theta[0] = theta[0] - alpha * deri_th0 theta[1] = theta[1] - alpha * deri_th1 # report if j % 200 == 0: self.loss_ = self.loss(X, y) msg = f"loss: {self.loss_}" print(msg) def app(): import streamlit as st def header(): st.subheader("Linear Regression using Gradient Descent") desc = """> Plain Python (vanilla version) i.e. without importing any library""" st.markdown(desc) header() st1, st2 = st.columns(2) with st1: code_math() with st2: interactive_run() st.markdown( f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)." ) def code_math(): import inspect import streamlit as st tex = st.latex write = st.write mark = st.write codify = lambda func: st.code(inspect.getsource(func), language="python") cls = LinearRegressionGradient(theta=[0, 0]) write("The class") codify(cls.__init__) write("the Hypothesis") tex(r"""h_\theta(x) = \theta_0 + \theta_1x""") codify(cls.hypothesis) mark('The Loss/Objective/Cost function "_minimize_"') tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""") codify(cls.loss) write("The Gradient Descent algorithm") mark("> repeat until converge {") tex( r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )""" ) tex( r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})""" ) mark("> }") codify(cls.gradientDescent) def interactive_run(): import streamlit as st import numpy as np mark = st.markdown tex = st.latex def random_data(n=10): def sample_linear_regression_dataset(n): # src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/ import numpy as np from sklearn import datasets import matplotlib.pyplot as plt # for plotting x, y, coef = datasets.make_regression( n_samples=n, # number of samples n_features=1, # number of features n_informative=1, # number of useful features noise=40, # bias and standard deviation of the guassian noise coef=True, # true coefficient used to generated the data random_state=0, ) # set for same data points for each run # Scale feature x (years of experience) to range 0..20 # x = np.interp(x, (x.min(), x.max()), (0, 20)) # Scale target y (salary) to range 20000..150000 # y = np.interp(y, (y.min(), y.max()), (20000, 150000)) plt.ion() # interactive plot on plt.plot(x, y, ".", label="training data") plt.xlabel("Years of experience") plt.ylabel("Salary $") plt.title("Experience Vs. Salary") # st.pyplot(plt.show()) # st.write(type(x.tolist())) # st.write(x.tolist()) X, y = x.reshape(x.shape[0],), y.reshape( y.shape[0], ) return np.around(X, 2), np.around(y, 2) # return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()] # return [item for sublist in x.tolist() for item in sublist], [ # item for sublist in y for item in sublist # ] X_, y_ = sample_linear_regression_dataset(n) return X_, y_ # st.write(type(X_), type(y_)) # st.write(type(np.round(X, 2).tolist())) # st.write(X_) # , y_) # return X, y # return np.around(X, 2).tolist(), np.around(y, 2).tolist() X, y = random_data() theta = [0, 0] # initial values model = LinearRegressionGradient(theta) n = st.slider("Number of samples", min_value=10, max_value=200, step=10) if st.button("generate new data and solve"): X, y = random_data(n=n) mark("_Input_") mark(f"_X_ = {X}") mark(f"_y_ = {y}") model.gradientDescent(X, y, theta) # run to optimize thetas mark("_Solution_") tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution tex(f"loss = {model.loss_}") mark("> How to run") mark( """ ```python X, y = random_data() theta = [0, 0] # initial values model = LinearRegressionGradient(theta) model.gradientDescent(X, y, theta) # run "i.e. optimize thetas" # print solution # print(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print(f"loss = {model.loss_}") ``` """ ) # -- visualize import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.scatter(X, y, label="Linear Relation") y_pred = theta[0] + theta[1] * np.array(X) ax.plot(X, y_pred) ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int) ax.legend(loc=2) # ax.axis("scaled") st.pyplot(fig) # st.line_chart(X, y)