azizalto commited on
Commit
c5301d0
1 Parent(s): bb987c8
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from page_config import APP_PAGE_HEADER
4
+ from ml_algorithms.linear_regression_gradient_descent import app as lrgd_app
5
+
6
+ APP_PAGE_HEADER()
7
+
8
+ with st.expander("Linear Regression using Gradient Descent"):
9
+ lrgd_app()
10
+
11
+
12
+ def app2():
13
+ import streamlit as st
14
+ import numpy as np
15
+ import matplotlib.pyplot as plt
16
+
17
+ st.write("*** Program Started ***")
18
+
19
+ n = 50
20
+ x = np.arange(-n / 2, n / 2, 1, dtype=np.float64)
21
+
22
+ m = np.random.uniform(0.3, 0.5, (n,))
23
+ b = np.random.uniform(5, 10, (n,))
24
+
25
+ y = x * m + b
26
+ print("x", x, type(x[0]))
27
+ print("y", y, type(y[0]))
28
+
29
+ plt.scatter(
30
+ x,
31
+ y,
32
+ s=None,
33
+ marker="o",
34
+ color="g",
35
+ edgecolors="g",
36
+ alpha=0.9,
37
+ label="Linear Relation",
38
+ )
39
+ plt.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
40
+ plt.legend(loc=2)
41
+ plt.axis("scaled")
42
+ st.pyplot(plt.show())
43
+
44
+
45
+ # app2()
ml_algorithms/k_means.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src: https://gist.github.com/iamaziz/ff570a6826b6d56c32b9d497a73e688c
2
+ # src: https://gist.github.com/iamaziz/0786e3de174c79839e42a5926f25acb2
3
+ def distance(u, v):
4
+ """
5
+ Calculates Euclidean distance between two point
6
+ distance = square_root( sum(u_i - v_i)^2 )
7
+
8
+ u: [float, float], point1
9
+ v: [float, float], point2
10
+ """
11
+ sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u)))
12
+ return sum_ ** (1 / 2)
13
+
14
+
15
+ def get_closer(target, *args):
16
+ """
17
+ Return the closest point (from points in `args`) to target
18
+
19
+ target: [float], target point
20
+ *args: [[float]], list of points
21
+ """
22
+ min_distance = float("inf")
23
+ closer = target
24
+ for point in args:
25
+ d = distance(point, target)
26
+ if d < min_distance:
27
+ min_distance = d
28
+ closer = point
29
+ return closer
30
+
31
+
32
+ def get_center(cluster):
33
+ """
34
+ Calculates the centroid point for `cluster`
35
+
36
+ cluster: [[float]], list of the points in cluster
37
+ """
38
+ center = []
39
+ n = len(cluster)
40
+ for i in range(len(cluster[0])):
41
+ c = sum(p[i] for p in cluster) / n
42
+ center.append(round(c, 1))
43
+ return center
44
+
45
+
46
+ def k_means(data, k=2, *centers):
47
+ """
48
+ Recursive k_means algorithm
49
+
50
+ data: [[float]], data points to consider for clustering
51
+ k: int, number of clusters
52
+ centers: [[float]], optiona - initial centroids
53
+ """
54
+ centers = list(centers) if centers else [data[i] for i in range(k)]
55
+ clusters = [[] for _ in range(k)]
56
+ for point in data:
57
+ nearest = get_closer(point, *centers)
58
+ nearest_cluster_index = centers.index(nearest)
59
+ clusters[nearest_cluster_index].append(point)
60
+
61
+ new_centers = [get_center(cluster) for cluster in clusters]
62
+ if centers == new_centers:
63
+ return clusters, centers
64
+ return k_means(data, k, *new_centers)
ml_algorithms/linear_regression_gradient_descent.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e
2
+
3
+
4
+ class LinearRegressionGradient:
5
+ def __init__(self, theta=None):
6
+ self.theta = theta
7
+ self.loss_ = float("inf")
8
+
9
+ def hypothesis(self, x):
10
+ return self.theta[0] + self.theta[1] * x
11
+
12
+ def loss(self, X, y):
13
+ m = len(X)
14
+ return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m)
15
+
16
+ def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01):
17
+ m = len(X)
18
+
19
+ for j in range(num_iter):
20
+
21
+ # predict
22
+ h = list(map(self.hypothesis, X))
23
+
24
+ # compute slope, aka derivative with current params (theta)
25
+ deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m
26
+ deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m
27
+
28
+ # update parameters (moving against the gradient 'derivative')
29
+ theta[0] = theta[0] - alpha * deri_th0
30
+ theta[1] = theta[1] - alpha * deri_th1
31
+
32
+ # report
33
+ if j % 200 == 0:
34
+ self.loss_ = self.loss(X, y)
35
+ msg = f"loss: {self.loss_}"
36
+ print(msg)
37
+
38
+
39
+ def app():
40
+ import streamlit as st
41
+
42
+ def header():
43
+ st.subheader("Linear Regression using Gradient Descent")
44
+ desc = """> Plain Python (vanilla version) i.e. without importing any library"""
45
+ st.markdown(desc)
46
+
47
+ header()
48
+
49
+ st1, st2 = st.columns(2)
50
+ with st1:
51
+ code_math()
52
+ with st2:
53
+ interactive_run()
54
+
55
+ st.markdown(
56
+ f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)."
57
+ )
58
+
59
+
60
+ def code_math():
61
+ import inspect
62
+ import streamlit as st
63
+
64
+ tex = st.latex
65
+ write = st.write
66
+ mark = st.write
67
+ codify = lambda func: st.code(inspect.getsource(func), language="python")
68
+ cls = LinearRegressionGradient(theta=[0, 0])
69
+
70
+ write("The class")
71
+ codify(cls.__init__)
72
+
73
+ write("the Hypothesis")
74
+ tex(r"""h_\theta(x) = \theta_0 + \theta_1x""")
75
+ codify(cls.hypothesis)
76
+ mark('The Loss/Objective/Cost function "_minimize_"')
77
+ tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""")
78
+ codify(cls.loss)
79
+ write("The Gradient Descent algorithm")
80
+ mark("> repeat until converge {")
81
+ tex(
82
+ r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )"""
83
+ )
84
+ tex(
85
+ r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})"""
86
+ )
87
+ mark("> }")
88
+ codify(cls.gradientDescent)
89
+
90
+
91
+ def interactive_run():
92
+ import streamlit as st
93
+ import numpy as np
94
+
95
+ mark = st.markdown
96
+ tex = st.latex
97
+
98
+ def random_data(n=10):
99
+ def sample_linear_regression_dataset(n):
100
+ # src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/
101
+ import numpy as np
102
+ from sklearn import datasets
103
+ import matplotlib.pyplot as plt # for plotting
104
+
105
+ x, y, coef = datasets.make_regression(
106
+ n_samples=n, # number of samples
107
+ n_features=1, # number of features
108
+ n_informative=1, # number of useful features
109
+ noise=40, # bias and standard deviation of the guassian noise
110
+ coef=True, # true coefficient used to generated the data
111
+ random_state=0,
112
+ ) # set for same data points for each run
113
+
114
+ # Scale feature x (years of experience) to range 0..20
115
+ # x = np.interp(x, (x.min(), x.max()), (0, 20))
116
+
117
+ # Scale target y (salary) to range 20000..150000
118
+ # y = np.interp(y, (y.min(), y.max()), (20000, 150000))
119
+
120
+ plt.ion() # interactive plot on
121
+ plt.plot(x, y, ".", label="training data")
122
+ plt.xlabel("Years of experience")
123
+ plt.ylabel("Salary $")
124
+ plt.title("Experience Vs. Salary")
125
+ # st.pyplot(plt.show())
126
+ # st.write(type(x.tolist()))
127
+ # st.write(x.tolist())
128
+
129
+ X, y = x.reshape(x.shape[0],), y.reshape(
130
+ y.shape[0],
131
+ )
132
+ return np.around(X, 2), np.around(y, 2)
133
+ # return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()]
134
+ # return [item for sublist in x.tolist() for item in sublist], [
135
+ # item for sublist in y for item in sublist
136
+ # ]
137
+
138
+ X_, y_ = sample_linear_regression_dataset(n)
139
+ return X_, y_
140
+ # st.write(type(X_), type(y_))
141
+ # st.write(type(np.round(X, 2).tolist()))
142
+ # st.write(X_) # , y_)
143
+ # return X, y
144
+
145
+ # return np.around(X, 2).tolist(), np.around(y, 2).tolist()
146
+
147
+ X, y = random_data()
148
+ theta = [0, 0] # initial values
149
+ model = LinearRegressionGradient(theta)
150
+ n = st.slider("Number of samples", min_value=10, max_value=200, step=10)
151
+ if st.button("generate new data"):
152
+ X, y = random_data(n=n)
153
+ mark("_Input_")
154
+ mark(f"_X_ = {X}")
155
+ mark(f"_y_ = {y}")
156
+ model.gradientDescent(X, y, theta) # run to optimize thetas
157
+ mark("_Solution_")
158
+ tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution
159
+ tex(f"loss = {model.loss_}")
160
+
161
+ # -- visualize
162
+ import matplotlib.pyplot as plt
163
+
164
+ fig, ax = plt.subplots()
165
+ ax.scatter(X, y, label="Linear Relation")
166
+ y_pred = theta[0] + theta[1] * np.array(X)
167
+ ax.plot(X, y_pred)
168
+ ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
169
+ ax.legend(loc=2)
170
+ # ax.axis("scaled")
171
+ st.pyplot(fig)
172
+ # st.line_chart(X, y)
page_config.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import date
2
+
3
+ import streamlit as st
4
+
5
+
6
+ def APP_PAGE_HEADER():
7
+ st.set_page_config(
8
+ page_title="ML Algorithms",
9
+ page_icon=":camel:",
10
+ layout="wide",
11
+ initial_sidebar_state="collapsed",
12
+ )
13
+
14
+ hide_style = """
15
+ <style>
16
+ #MainMenu {visibility: hidden;}
17
+ footer {visibility: hidden;}
18
+ </style>
19
+ """
20
+ st.markdown(hide_style, unsafe_allow_html=True)
21
+ HEADER()
22
+
23
+
24
+ def HEADER():
25
+ today = date.today()
26
+ st.header("_Simple ML Algorithms explained in Math & Code_")
27
+ st.write(str(today))
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ streamlit
2
+ matplotlib
3
+ scikit-learn