Spaces:

azizalto
/

vanilla-ml-algorithms

Runtime error

App Files Files Community

azizalto commited on Nov 25, 2021

Commit

c5301d0

•

1 Parent(s): bb987c8

init

Browse files

Files changed (5) hide show

app.py +45 -0
ml_algorithms/k_means.py +64 -0
ml_algorithms/linear_regression_gradient_descent.py +172 -0
page_config.py +27 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import streamlit as st
+from page_config import APP_PAGE_HEADER
+from ml_algorithms.linear_regression_gradient_descent import app as lrgd_app
+APP_PAGE_HEADER()
+with st.expander("Linear Regression using Gradient Descent"):
+    lrgd_app()
+def app2():
+    import streamlit as st
+    import numpy as np
+    import matplotlib.pyplot as plt
+    st.write("*** Program Started ***")
+    n = 50
+    x = np.arange(-n / 2, n / 2, 1, dtype=np.float64)
+    m = np.random.uniform(0.3, 0.5, (n,))
+    b = np.random.uniform(5, 10, (n,))
+    y = x * m + b
+    print("x", x, type(x[0]))
+    print("y", y, type(y[0]))
+    plt.scatter(
+        x,
+        y,
+        s=None,
+        marker="o",
+        color="g",
+        edgecolors="g",
+        alpha=0.9,
+        label="Linear Relation",
+    )
+    plt.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
+    plt.legend(loc=2)
+    plt.axis("scaled")
+    st.pyplot(plt.show())
+# app2()

ml_algorithms/k_means.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# src: https://gist.github.com/iamaziz/ff570a6826b6d56c32b9d497a73e688c
+# src: https://gist.github.com/iamaziz/0786e3de174c79839e42a5926f25acb2
+def distance(u, v):
+    """
+    Calculates Euclidean distance between two point
+    distance = square_root( sum(u_i - v_i)^2 )
+    u: [float, float], point1
+    v: [float, float], point2
+    """
+    sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u)))
+    return sum_ ** (1 / 2)
+def get_closer(target, *args):
+    """
+    Return the closest point (from points in `args`) to target
+    target:  [float], target point
+    *args: [[float]], list of points
+    """
+    min_distance = float("inf")
+    closer = target
+    for point in args:
+        d = distance(point, target)
+        if d < min_distance:
+            min_distance = d
+            closer = point
+    return closer
+def get_center(cluster):
+    """
+    Calculates the centroid point for `cluster`
+    cluster: [[float]], list of the points in cluster
+    """
+    center = []
+    n = len(cluster)
+    for i in range(len(cluster[0])):
+        c = sum(p[i] for p in cluster) / n
+        center.append(round(c, 1))
+    return center
+def k_means(data, k=2, *centers):
+    """
+    Recursive k_means algorithm
+    data: [[float]],    data points to consider for clustering
+    k: int,             number of clusters
+    centers: [[float]], optiona - initial centroids
+    """
+    centers = list(centers) if centers else [data[i] for i in range(k)]
+    clusters = [[] for _ in range(k)]
+    for point in data:
+        nearest = get_closer(point, *centers)
+        nearest_cluster_index = centers.index(nearest)
+        clusters[nearest_cluster_index].append(point)
+    new_centers = [get_center(cluster) for cluster in clusters]
+    if centers == new_centers:
+        return clusters, centers
+    return k_means(data, k, *new_centers)

ml_algorithms/linear_regression_gradient_descent.py ADDED Viewed

	@@ -0,0 +1,172 @@

+# src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e
+class LinearRegressionGradient:
+    def __init__(self, theta=None):
+        self.theta = theta
+        self.loss_ = float("inf")
+    def hypothesis(self, x):
+        return self.theta[0] + self.theta[1] * x
+    def loss(self, X, y):
+        m = len(X)
+        return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m)
+    def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01):
+        m = len(X)
+        for j in range(num_iter):
+            # predict
+            h = list(map(self.hypothesis, X))
+            # compute slope, aka derivative with current params (theta)
+            deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m
+            deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m
+            # update parameters (moving against the gradient 'derivative')
+            theta[0] = theta[0] - alpha * deri_th0
+            theta[1] = theta[1] - alpha * deri_th1
+            # report
+            if j % 200 == 0:
+                self.loss_ = self.loss(X, y)
+                msg = f"loss: {self.loss_}"
+                print(msg)
+def app():
+    import streamlit as st
+    def header():
+        st.subheader("Linear Regression using Gradient Descent")
+        desc = """> Plain Python (vanilla version) i.e. without importing any library"""
+        st.markdown(desc)
+    header()
+    st1, st2 = st.columns(2)
+    with st1:
+        code_math()
+    with st2:
+        interactive_run()
+    st.markdown(
+        f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)."
+    )
+def code_math():
+    import inspect
+    import streamlit as st
+    tex = st.latex
+    write = st.write
+    mark = st.write
+    codify = lambda func: st.code(inspect.getsource(func), language="python")
+    cls = LinearRegressionGradient(theta=[0, 0])
+    write("The class")
+    codify(cls.__init__)
+    write("the Hypothesis")
+    tex(r"""h_\theta(x) = \theta_0 + \theta_1x""")
+    codify(cls.hypothesis)
+    mark('The Loss/Objective/Cost function "_minimize_"')
+    tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""")
+    codify(cls.loss)
+    write("The Gradient Descent algorithm")
+    mark("> repeat until converge {")
+    tex(
+        r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )"""
+    )
+    tex(
+        r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})"""
+    )
+    mark("> }")
+    codify(cls.gradientDescent)
+def interactive_run():
+    import streamlit as st
+    import numpy as np
+    mark = st.markdown
+    tex = st.latex
+    def random_data(n=10):
+        def sample_linear_regression_dataset(n):
+            # src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/
+            import numpy as np
+            from sklearn import datasets
+            import matplotlib.pyplot as plt  # for plotting
+            x, y, coef = datasets.make_regression(
+                n_samples=n,  # number of samples
+                n_features=1,  # number of features
+                n_informative=1,  # number of useful features
+                noise=40,  # bias and standard deviation of the guassian noise
+                coef=True,  # true coefficient used to generated the data
+                random_state=0,
+            )  # set for same data points for each run
+            # Scale feature x (years of experience) to range 0..20
+            # x = np.interp(x, (x.min(), x.max()), (0, 20))
+            # Scale target y (salary) to range 20000..150000
+            # y = np.interp(y, (y.min(), y.max()), (20000, 150000))
+            plt.ion()  # interactive plot on
+            plt.plot(x, y, ".", label="training data")
+            plt.xlabel("Years of experience")
+            plt.ylabel("Salary $")
+            plt.title("Experience Vs. Salary")
+            # st.pyplot(plt.show())
+            # st.write(type(x.tolist()))
+            # st.write(x.tolist())
+            X, y = x.reshape(x.shape[0],), y.reshape(
+                y.shape[0],
+            )
+            return np.around(X, 2), np.around(y, 2)
+            # return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()]
+            # return [item for sublist in x.tolist() for item in sublist], [
+            #     item for sublist in y for item in sublist
+            # ]
+        X_, y_ = sample_linear_regression_dataset(n)
+        return X_, y_
+        # st.write(type(X_), type(y_))
+        # st.write(type(np.round(X, 2).tolist()))
+        # st.write(X_)  # , y_)
+        # return X, y
+        # return np.around(X, 2).tolist(), np.around(y, 2).tolist()
+    X, y = random_data()
+    theta = [0, 0]  # initial values
+    model = LinearRegressionGradient(theta)
+    n = st.slider("Number of samples", min_value=10, max_value=200, step=10)
+    if st.button("generate new data"):
+        X, y = random_data(n=n)
+    mark("_Input_")
+    mark(f"_X_ = {X}")
+    mark(f"_y_ = {y}")
+    model.gradientDescent(X, y, theta)  # run to optimize thetas
+    mark("_Solution_")
+    tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x")  # print solution
+    tex(f"loss = {model.loss_}")
+    # -- visualize
+    import matplotlib.pyplot as plt
+    fig, ax = plt.subplots()
+    ax.scatter(X, y, label="Linear Relation")
+    y_pred = theta[0] + theta[1] * np.array(X)
+    ax.plot(X, y_pred)
+    ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
+    ax.legend(loc=2)
+    # ax.axis("scaled")
+    st.pyplot(fig)
+    # st.line_chart(X, y)

page_config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from datetime import date
+import streamlit as st
+def APP_PAGE_HEADER():
+    st.set_page_config(
+        page_title="ML Algorithms",
+        page_icon=":camel:",
+        layout="wide",
+        initial_sidebar_state="collapsed",
+    )
+    hide_style = """
+                    <style>
+                    #MainMenu {visibility: hidden;}
+                    footer {visibility: hidden;}
+                    </style>
+                    """
+    st.markdown(hide_style, unsafe_allow_html=True)
+    HEADER()
+def HEADER():
+    today = date.today()
+    st.header("_Simple ML Algorithms explained in Math & Code_")
+    st.write(str(today))

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+matplotlib
+scikit-learn