Skratch99 commited on
Commit
ffb24ac
1 Parent(s): f8963f5

Final Update

Browse files
Files changed (1) hide show
  1. app.py +62 -64
app.py CHANGED
@@ -3,19 +3,15 @@ import numpy as np
3
  import matplotlib.pyplot as plt
4
  from sklearn.datasets import make_blobs
5
  from sklearn.cluster import KMeans
6
- import pandas as pd
7
- from csv_create import blob_data
8
- import atexit
9
-
10
- # Function to plot the Voronoi diagram
11
 
12
 
13
  def plot_voronoi(X, kmeans, added_points):
14
  # Create a meshgrid of points to plot the Voronoi diagram
 
15
  x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
16
  y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
17
- xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
18
- np.arange(y_min, y_max, 0.1))
19
 
20
  # Predict the cluster labels for the meshgrid points
21
  Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
@@ -25,65 +21,67 @@ def plot_voronoi(X, kmeans, added_points):
25
  y_pred = kmeans.predict(X)
26
 
27
  # Plot the Voronoi diagram and the data points
28
- plt.figure(figsize=(10, 8))
29
- plt.contourf(xx, yy, Z, alpha=0.4)
30
- plt.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=0.8, edgecolors='k')
31
- plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[
32
- :, 1], marker='*', s=300, c='r')
33
  if added_points:
34
  for point in added_points:
35
- plt.scatter(point[:, 0], point[:, 1], marker='x',
36
- s=200, linewidths=3, color='black')
37
- plt.xlabel('Feature 1')
38
- plt.ylabel('Feature 2')
39
- plt.title('KMeans Clustering')
40
- st.pyplot(plt)
41
-
42
-
43
- def run_kmeans_app():
44
- # Create a clustered dataset using make_blobs
45
- df = pd.read_csv('blobs.csv')
46
- X = df[['feature1', 'feature2']].values
47
-
48
- # Initialize KMeans with 3 clusters
49
- kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
50
-
51
- # Streamlit app to add random data points and visualize the changing predictions
52
- st.title('KMeans Clustering')
53
- st.write('This app allows you to add random data points to an initial clustered dataset and visualize the changing predictions using a Voronoi diagram.')
54
- st.write('Initial dataset:')
55
- plot_voronoi(X, kmeans, added_points=[])
56
-
57
- # Add random data points to the initial dataset
58
- num_points = 1
59
- added_points = []
60
-
61
- button = st.button(f'Add random data point(s)')
62
- if button:
63
- # Generate a random data point
64
- for i in range(num_points):
65
- new_point = np.random.uniform(
66
- low=X.min(), high=X.max(), size=(1, 2))
67
- # added_points.append(new_point)
68
- # Add the random data point to the dataset
69
- X = np.vstack((X, new_point))
70
- # Update the KMeans model with the new data point
71
- kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
72
- df_new = pd.DataFrame(new_point, columns=["feature1", "feature2"])
73
- df_new["label"] = kmeans.predict(new_point)
74
- with open("blobs.csv", "a") as f:
75
- df_new.to_csv(f, header=False, index=False)
76
- # Plot the updated Voronoi diagram
77
- st.write(f'Updated dataset after adding {i+1} random data point(s):')
78
- plot_voronoi(X, kmeans, added_points)
79
-
80
- # Concatenate the added points with the initial dataset
81
- if added_points:
82
- added_points = np.vstack(added_points)
83
- X = np.vstack((X[:-len(added_points)], added_points))
 
 
 
 
 
 
84
 
85
 
86
- if __name__ == '__main__':
87
- run_kmeans_app()
88
 
89
- atexit.register(blob_data)
 
3
  import matplotlib.pyplot as plt
4
  from sklearn.datasets import make_blobs
5
  from sklearn.cluster import KMeans
6
+ from scipy.spatial import Voronoi, voronoi_plot_2d
 
 
 
 
7
 
8
 
9
  def plot_voronoi(X, kmeans, added_points):
10
  # Create a meshgrid of points to plot the Voronoi diagram
11
+ fig, ax = plt.subplots()
12
  x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
13
  y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
14
+ xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
 
15
 
16
  # Predict the cluster labels for the meshgrid points
17
  Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
 
21
  y_pred = kmeans.predict(X)
22
 
23
  # Plot the Voronoi diagram and the data points
24
+ # ax.figure(figsize=(10, 8))
25
+ ax.contourf(xx, yy, Z, alpha=0.4)
26
+ ax.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=0.8, edgecolors='k')
27
+ # ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='r')
 
28
  if added_points:
29
  for point in added_points:
30
+ plt.scatter(point[:, 0], point[:, 1], s=5, linewidths=3, color='red')
31
+ ax.set_xlabel('Feature 1')
32
+ ax.set_ylabel('Feature 2')
33
+ ax.set_title('KMeans Clustering')
34
+ ax.legend()
35
+ st.set_option('deprecation.showPyplotGlobalUse', False)
36
+ st.pyplot(fig)
37
+
38
+
39
+ # Set up the Streamlit app layout
40
+ # st.set_page_config(page_title='K Means Clustering')
41
+ st.title('K Means Clustering')
42
+ st.write("---")
43
+
44
+ st.sidebar.write("Create random dataset")
45
+ # Create a random dataset
46
+ n_samples = st.sidebar.number_input('Number of samples', min_value=100, max_value=1000, value=200)
47
+ n_centers = st.sidebar.number_input('Number of centers', min_value=1, max_value=20, value=3)
48
+
49
+ # Streamlit app to add random data points and visualize the changing predictions
50
+ st.write('This app allows you to add random data points to an initial clustered dataset and visualize the changing predictions using a Voronoi diagram.')
51
+
52
+
53
+ if st.session_state.get('X') is None:
54
+ st.session_state.n_samples = n_samples
55
+ st.session_state.n_centers = n_centers
56
+ st.session_state.X, st.session_state.y = make_blobs(n_samples=n_samples, centers=n_centers, n_features=2, random_state=42)
57
+ st.session_state.kmeans = KMeans(n_clusters=n_centers, random_state=42)
58
+ st.session_state.kmeans.fit(st.session_state.X)
59
+
60
+ if st.session_state.n_samples != n_samples or st.session_state.n_centers != n_centers:
61
+ st.session_state.n_samples = n_samples
62
+ st.session_state.n_centers = n_centers
63
+ st.session_state.X, st.session_state.y = make_blobs(n_samples=n_samples, centers=n_centers, n_features=2, random_state=42)
64
+ st.session_state.kmeans = KMeans(n_clusters=n_centers, random_state=42)
65
+ st.session_state.kmeans.fit(st.session_state.X)
66
+
67
+ n_points = st.sidebar.number_input('Number of points to add', min_value=0, max_value=100, value=15)
68
+ added_points = []
69
+ X = st.session_state.X
70
+ kmeans = st.session_state.kmeans
71
+
72
+ st.button('Add data points')
73
+ if st.button:
74
+ for i in range(n_points):
75
+ new_point = np.random.uniform(low=X.min(), high=X.max(), size=(1, 2))
76
+ added_points.append(new_point)
77
+ # Add the random data point to the dataset
78
+ X = np.vstack((X, new_point))
79
+
80
+ # Fit KMeans to the new dataset
81
+ kmeans = KMeans(n_clusters=n_centers, random_state=42).fit(X)
82
+ plot_voronoi(X, kmeans, added_points)
83
+ st.session_state.X = X
84
+ st.session_state.kmeans = kmeans
85
 
86
 
 
 
87