Spaces:
Runtime error
Runtime error
File size: 6,038 Bytes
c5301d0 c5d63f2 c5301d0 b673599 c5301d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e
class LinearRegressionGradient:
def __init__(self, theta=None):
self.theta = theta
self.loss_ = float("inf")
def hypothesis(self, x):
return self.theta[0] + self.theta[1] * x
def loss(self, X, y):
m = len(X)
return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m)
def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01):
m = len(X)
for j in range(num_iter):
# predict
h = list(map(self.hypothesis, X))
# compute slope, aka derivative with current params (theta)
deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m
deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m
# update parameters (moving against the gradient 'derivative')
theta[0] = theta[0] - alpha * deri_th0
theta[1] = theta[1] - alpha * deri_th1
# report
if j % 200 == 0:
self.loss_ = self.loss(X, y)
msg = f"loss: {self.loss_}"
print(msg)
def app():
import streamlit as st
def header():
st.subheader("Linear Regression using Gradient Descent")
desc = """> Plain Python (vanilla version) i.e. without importing any library"""
st.markdown(desc)
header()
st1, st2 = st.columns(2)
with st1:
code_math()
with st2:
interactive_run()
st.markdown(
f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)."
)
def code_math():
import inspect
import streamlit as st
tex = st.latex
write = st.write
mark = st.write
codify = lambda func: st.code(inspect.getsource(func), language="python")
cls = LinearRegressionGradient(theta=[0, 0])
write("The class")
codify(cls.__init__)
write("the Hypothesis")
tex(r"""h_\theta(x) = \theta_0 + \theta_1x""")
codify(cls.hypothesis)
mark('The Loss/Objective/Cost function "_minimize_"')
tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""")
codify(cls.loss)
write("The Gradient Descent algorithm")
mark("> repeat until converge {")
tex(
r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )"""
)
tex(
r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})"""
)
mark("> }")
codify(cls.gradientDescent)
def interactive_run():
import streamlit as st
import numpy as np
mark = st.markdown
tex = st.latex
def random_data(n=10):
def sample_linear_regression_dataset(n):
# src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt # for plotting
x, y, coef = datasets.make_regression(
n_samples=n, # number of samples
n_features=1, # number of features
n_informative=1, # number of useful features
noise=40, # bias and standard deviation of the guassian noise
coef=True, # true coefficient used to generated the data
random_state=0,
) # set for same data points for each run
# Scale feature x (years of experience) to range 0..20
# x = np.interp(x, (x.min(), x.max()), (0, 20))
# Scale target y (salary) to range 20000..150000
# y = np.interp(y, (y.min(), y.max()), (20000, 150000))
plt.ion() # interactive plot on
plt.plot(x, y, ".", label="training data")
plt.xlabel("Years of experience")
plt.ylabel("Salary $")
plt.title("Experience Vs. Salary")
# st.pyplot(plt.show())
# st.write(type(x.tolist()))
# st.write(x.tolist())
X, y = x.reshape(x.shape[0],), y.reshape(
y.shape[0],
)
return np.around(X, 2), np.around(y, 2)
# return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()]
# return [item for sublist in x.tolist() for item in sublist], [
# item for sublist in y for item in sublist
# ]
X_, y_ = sample_linear_regression_dataset(n)
return X_, y_
# st.write(type(X_), type(y_))
# st.write(type(np.round(X, 2).tolist()))
# st.write(X_) # , y_)
# return X, y
# return np.around(X, 2).tolist(), np.around(y, 2).tolist()
X, y = random_data()
theta = [0, 0] # initial values
model = LinearRegressionGradient(theta)
n = st.slider("Number of samples", min_value=10, max_value=200, step=10)
if st.button("generate new data and solve"):
X, y = random_data(n=n)
mark("_Input_")
mark(f"_X_ = {X}")
mark(f"_y_ = {y}")
model.gradientDescent(X, y, theta) # run to optimize thetas
mark("_Solution_")
tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution
tex(f"loss = {model.loss_}")
mark("> How to run")
mark(
"""
```python
X, y = random_data()
theta = [0, 0] # initial values
model = LinearRegressionGradient(theta)
model.gradientDescent(X, y, theta) # run "i.e. optimize thetas"
# print solution
# print(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x")
# print(f"loss = {model.loss_}")
```
"""
)
# -- visualize
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.scatter(X, y, label="Linear Relation")
y_pred = theta[0] + theta[1] * np.array(X)
ax.plot(X, y_pred)
ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
ax.legend(loc=2)
# ax.axis("scaled")
st.pyplot(fig)
# st.line_chart(X, y)
|