Spaces:

Skratch99
/

K-Means_Visualization

Runtime error

App Files Files Community

Skratch99 commited on Apr 25, 2023

Commit

ffb24ac

•

1 Parent(s): f8963f5

Final Update

Browse files

Files changed (1) hide show

app.py +62 -64

app.py CHANGED Viewed

@@ -3,19 +3,15 @@ import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.datasets import make_blobs
 from sklearn.cluster import KMeans
-import pandas as pd
-from csv_create import blob_data
-import atexit
-# Function to plot the Voronoi diagram
 def plot_voronoi(X, kmeans, added_points):
     # Create a meshgrid of points to plot the Voronoi diagram
     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
-    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
-                         np.arange(y_min, y_max, 0.1))
     # Predict the cluster labels for the meshgrid points
     Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
@@ -25,65 +21,67 @@ def plot_voronoi(X, kmeans, added_points):
     y_pred = kmeans.predict(X)
     # Plot the Voronoi diagram and the data points
-    plt.figure(figsize=(10, 8))
-    plt.contourf(xx, yy, Z, alpha=0.4)
-    plt.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=0.8, edgecolors='k')
-    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[
-                :, 1], marker='*', s=300, c='r')
     if added_points:
         for point in added_points:
-            plt.scatter(point[:, 0], point[:, 1], marker='x',
-                        s=200, linewidths=3, color='black')
-    plt.xlabel('Feature 1')
-    plt.ylabel('Feature 2')
-    plt.title('KMeans Clustering')
-    st.pyplot(plt)
-def run_kmeans_app():
-    # Create a clustered dataset using make_blobs
-    df = pd.read_csv('blobs.csv')
-    X = df[['feature1', 'feature2']].values
-    # Initialize KMeans with 3 clusters
-    kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
-    # Streamlit app to add random data points and visualize the changing predictions
-    st.title('KMeans Clustering')
-    st.write('This app allows you to add random data points to an initial clustered dataset and visualize the changing predictions using a Voronoi diagram.')
-    st.write('Initial dataset:')
-    plot_voronoi(X, kmeans, added_points=[])
-    # Add random data points to the initial dataset
-    num_points = 1
-    added_points = []
-    button = st.button(f'Add random data point(s)')
-    if button:
-        # Generate a random data point
-        for i in range(num_points):
-            new_point = np.random.uniform(
-                low=X.min(), high=X.max(), size=(1, 2))
-            # added_points.append(new_point)
-            # Add the random data point to the dataset
-            X = np.vstack((X, new_point))
-        # Update the KMeans model with the new data point
-        kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
-        df_new = pd.DataFrame(new_point, columns=["feature1", "feature2"])
-        df_new["label"] = kmeans.predict(new_point)
-        with open("blobs.csv", "a") as f:
-            df_new.to_csv(f, header=False, index=False)
-        # Plot the updated Voronoi diagram
-        st.write(f'Updated dataset after adding {i+1} random data point(s):')
-        plot_voronoi(X, kmeans, added_points)
-    # Concatenate the added points with the initial dataset
-    if added_points:
-        added_points = np.vstack(added_points)
-        X = np.vstack((X[:-len(added_points)], added_points))
-if __name__ == '__main__':
-    run_kmeans_app()
-atexit.register(blob_data)

 import matplotlib.pyplot as plt
 from sklearn.datasets import make_blobs
 from sklearn.cluster import KMeans
+from scipy.spatial import Voronoi, voronoi_plot_2d
 def plot_voronoi(X, kmeans, added_points):
     # Create a meshgrid of points to plot the Voronoi diagram
+    fig, ax = plt.subplots()
     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
     # Predict the cluster labels for the meshgrid points
     Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
     y_pred = kmeans.predict(X)
     # Plot the Voronoi diagram and the data points
+    # ax.figure(figsize=(10, 8))
+    ax.contourf(xx, yy, Z, alpha=0.4)
+    ax.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=0.8, edgecolors='k')
+    # ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='r')
     if added_points:
         for point in added_points:
+            plt.scatter(point[:, 0], point[:, 1], s=5, linewidths=3, color='red')
+    ax.set_xlabel('Feature 1')
+    ax.set_ylabel('Feature 2')
+    ax.set_title('KMeans Clustering')
+    ax.legend()
+    st.set_option('deprecation.showPyplotGlobalUse', False)
+    st.pyplot(fig)
+# Set up the Streamlit app layout
+# st.set_page_config(page_title='K Means Clustering')
+st.title('K Means Clustering')
+st.write("---")
+st.sidebar.write("Create random dataset")
+# Create a random dataset
+n_samples = st.sidebar.number_input('Number of samples', min_value=100, max_value=1000, value=200)
+n_centers = st.sidebar.number_input('Number of centers', min_value=1, max_value=20, value=3)
+# Streamlit app to add random data points and visualize the changing predictions
+st.write('This app allows you to add random data points to an initial clustered dataset and visualize the changing predictions using a Voronoi diagram.')
+if st.session_state.get('X') is None:
+    st.session_state.n_samples = n_samples
+    st.session_state.n_centers = n_centers
+    st.session_state.X, st.session_state.y = make_blobs(n_samples=n_samples, centers=n_centers, n_features=2, random_state=42)
+    st.session_state.kmeans = KMeans(n_clusters=n_centers, random_state=42)
+    st.session_state.kmeans.fit(st.session_state.X)
+if st.session_state.n_samples != n_samples or st.session_state.n_centers != n_centers:
+    st.session_state.n_samples = n_samples
+    st.session_state.n_centers = n_centers
+    st.session_state.X, st.session_state.y = make_blobs(n_samples=n_samples, centers=n_centers, n_features=2, random_state=42)
+    st.session_state.kmeans = KMeans(n_clusters=n_centers, random_state=42)
+    st.session_state.kmeans.fit(st.session_state.X)
+n_points = st.sidebar.number_input('Number of points to add', min_value=0, max_value=100, value=15)
+added_points = []
+X = st.session_state.X
+kmeans = st.session_state.kmeans
+st.button('Add data points')
+if st.button:
+    for i in range(n_points):
+        new_point = np.random.uniform(low=X.min(), high=X.max(), size=(1, 2))
+        added_points.append(new_point)
+        # Add the random data point to the dataset
+        X = np.vstack((X, new_point))
+    # Fit KMeans to the new dataset
+    kmeans = KMeans(n_clusters=n_centers, random_state=42).fit(X)
+    plot_voronoi(X, kmeans, added_points)
+    st.session_state.X = X
+    st.session_state.kmeans = kmeans