Skratch99 commited on
Commit
167dce0
1 Parent(s): eb62a34
Files changed (3) hide show
  1. app.py +89 -0
  2. blobs.csv +101 -0
  3. csv_create.py +21 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.datasets import make_blobs
5
+ from sklearn.cluster import KMeans
6
+ import pandas as pd
7
+ from csv_create import blob_data
8
+ import atexit
9
+
10
+ # Function to plot the Voronoi diagram
11
+
12
+
13
+ def plot_voronoi(X, kmeans, added_points):
14
+ # Create a meshgrid of points to plot the Voronoi diagram
15
+ x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
16
+ y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
17
+ xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
18
+ np.arange(y_min, y_max, 0.1))
19
+
20
+ # Predict the cluster labels for the meshgrid points
21
+ Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
22
+ Z = Z.reshape(xx.shape)
23
+
24
+ # Predict the cluster labels for the data points
25
+ y_pred = kmeans.predict(X)
26
+
27
+ # Plot the Voronoi diagram and the data points
28
+ plt.figure(figsize=(10, 8))
29
+ plt.contourf(xx, yy, Z, alpha=0.4)
30
+ plt.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=0.8, edgecolors='k')
31
+ plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[
32
+ :, 1], marker='*', s=300, c='r')
33
+ if added_points:
34
+ for point in added_points:
35
+ plt.scatter(point[:, 0], point[:, 1], marker='x',
36
+ s=200, linewidths=3, color='black')
37
+ plt.xlabel('Feature 1')
38
+ plt.ylabel('Feature 2')
39
+ plt.title('KMeans Clustering')
40
+ st.pyplot(plt)
41
+
42
+
43
+ def run_kmeans_app():
44
+ # Create a clustered dataset using make_blobs
45
+ df = pd.read_csv('blobs.csv')
46
+ X = df[['feature1', 'feature2']].values
47
+
48
+ # Initialize KMeans with 3 clusters
49
+ kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
50
+
51
+ # Streamlit app to add random data points and visualize the changing predictions
52
+ st.title('KMeans Clustering')
53
+ st.write('This app allows you to add random data points to an initial clustered dataset and visualize the changing predictions using a Voronoi diagram.')
54
+ st.write('Initial dataset:')
55
+ plot_voronoi(X, kmeans, added_points=[])
56
+
57
+ # Add random data points to the initial dataset
58
+ num_points = 1
59
+ added_points = []
60
+
61
+ button = st.button(f'Add random data point(s)')
62
+ if button:
63
+ # Generate a random data point
64
+ for i in range(num_points):
65
+ new_point = np.random.uniform(
66
+ low=X.min(), high=X.max(), size=(1, 2))
67
+ # added_points.append(new_point)
68
+ # Add the random data point to the dataset
69
+ X = np.vstack((X, new_point))
70
+ # Update the KMeans model with the new data point
71
+ kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
72
+ df_new = pd.DataFrame(new_point, columns=["feature1", "feature2"])
73
+ df_new["label"] = kmeans.predict(new_point)
74
+ with open("blobs.csv", "a") as f:
75
+ df_new.to_csv(f, header=False, index=False)
76
+ # Plot the updated Voronoi diagram
77
+ st.write(f'Updated dataset after adding {i+1} random data point(s):')
78
+ plot_voronoi(X, kmeans, added_points)
79
+
80
+ # Concatenate the added points with the initial dataset
81
+ if added_points:
82
+ added_points = np.vstack(added_points)
83
+ X = np.vstack((X[:-len(added_points)], added_points))
84
+
85
+
86
+ if __name__ == '__main__':
87
+ run_kmeans_app()
88
+
89
+ atexit.register(blob_data)
blobs.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature1,feature2,label
2
+ -7.726420909219675,-8.39495681796181,2
3
+ 5.453396053597771,0.7423053675067768,1
4
+ -2.978672008987702,9.556846171784287,0
5
+ 6.042673147164201,0.5713186211484511,1
6
+ -6.521839830802987,-6.319325066907712,2
7
+ 3.649342511097413,1.40687195433796,1
8
+ -2.1779341916491863,9.98983125532068,0
9
+ 4.4202069483905895,2.3302822554524782,1
10
+ 4.736956385576142,2.9418146744736213,1
11
+ -3.6601912004750528,9.389984146543993,0
12
+ -3.053580347577933,9.125208717908187,0
13
+ -6.65216725654714,-5.572966838993518,2
14
+ -6.357685625534373,-6.583124920042761,2
15
+ -3.6155325970587784,7.818079504117651,0
16
+ -1.7707310430573397,9.185654409388292,0
17
+ -7.950519689212382,-6.397637178032761,2
18
+ -6.602936391821251,-6.052926344239923,2
19
+ -2.581207744633084,10.017819026090345,0
20
+ -7.763484627352403,-6.726384487330419,2
21
+ -6.406389566577725,-6.95293850593282,2
22
+ -2.9726153158652124,8.548556374628065,0
23
+ -6.956728900565374,-6.538957618459302,2
24
+ -7.326142143218291,-6.023710798952474,2
25
+ -2.147802017544336,10.55232269466429,0
26
+ -2.5450236621627016,10.578929784012328,0
27
+ -2.9698363940125376,10.071408354417237,0
28
+ 3.224508094177687,1.552524361175373,1
29
+ -6.253959843386264,-7.737267149692229,2
30
+ -7.854308861378591,-6.093024989533495,2
31
+ -8.116577902029352,-8.200566206360223,2
32
+ -7.559651912729761,-6.647855896114943,2
33
+ 4.935999113292677,2.234224956120621,1
34
+ 4.447517871446979,2.2747170262743444,1
35
+ -5.721031612143866,-7.7007919116276575,2
36
+ -0.9299848075453587,9.78172085735123,0
37
+ -3.109836312971555,8.722592378405045,0
38
+ -2.4416694183648264,7.589537941984865,0
39
+ -2.185113653657955,8.629203847782005,0
40
+ 5.5552809539301755,2.3019207936004165,1
41
+ 4.731639612763604,-0.0143992306601608,1
42
+ -8.257296559108362,-7.8179346331910695,2
43
+ -2.9883718608980403,8.828627151534505,0
44
+ 4.605167066522858,0.8044916463212,1
45
+ -3.837383671951181,9.211147364067445,0
46
+ -2.6248459054409907,8.713182432609033,0
47
+ 3.5775751225019965,2.4467621145759137,1
48
+ -8.487110425712498,-6.695475734743642,2
49
+ -6.706446265300088,-6.49479221354711,2
50
+ -6.866625299273363,-5.42657551611863,2
51
+ 3.8313852333349137,1.4714126403561956,1
52
+ 2.020133732138357,2.795072188315956,1
53
+ 4.644992292870563,1.7385825505655852,1
54
+ -1.6966718006585522,10.370526156769145,0
55
+ -6.619744396902846,-6.098286721498637,2
56
+ -6.05756703115678,-4.9833166106219995,2
57
+ -7.103089976477121,-6.1661090991838545,2
58
+ -3.5220287433871738,9.328533460793595,0
59
+ -2.267235351486716,7.101005883540523,0
60
+ 6.117772880969618,1.4548994656670846,1
61
+ -4.234115455565783,8.45199859895735,0
62
+ -6.5865547178525885,-7.5944610113023145,2
63
+ 3.937825742350749,1.6455075373429637,1
64
+ -7.125015307154141,-7.633845757633436,2
65
+ 2.72110762092906,1.946655808491515,1
66
+ -7.1442840243892265,-4.159940426686328,2
67
+ -6.665533447021066,-8.125848371987935,2
68
+ 4.7001090461691275,4.436411796426018,1
69
+ -7.769141620776793,-7.695919878241385,2
70
+ 4.110118632461063,2.486437117054088,1
71
+ 4.897429226950866,1.8987237681745648,1
72
+ 4.297164319701332,1.170892414719113,1
73
+ -6.629134340805393,-6.533661383778971,2
74
+ -8.070930688353918,-6.2235559846421165,2
75
+ -2.1655793334842888,7.251245972835587,0
76
+ 4.739530201315743,1.4696940298245327,1
77
+ -5.916251061906948,-6.467328666339449,2
78
+ 5.430910783271148,1.063782229145993,1
79
+ -6.82141847270527,-8.023079891106569,2
80
+ 6.526064737438632,2.147747496772571,1
81
+ 3.0892154051619687,2.0417326587467595,1
82
+ -2.147561598005116,8.369166373593197,0
83
+ 3.856625543891864,1.6511081677350563,1
84
+ -1.686652710949561,7.793442478227299,0
85
+ -5.013852680006513,-6.406276672364159,2
86
+ -2.5226948477906843,7.956575199242421,0
87
+ -2.300334028047995,7.054616004318546,0
88
+ -1.0435488541311961,8.788509827711787,0
89
+ 3.720454601994298,3.5231040889582714,1
90
+ -3.9877196134201776,8.294441919803614,0
91
+ 4.247770683095943,0.5096547358086134,1
92
+ 4.726925904466273,1.6741623334748645,1
93
+ 5.782701650743122,2.7251027166275064,1
94
+ -3.417221698573961,7.60198242686303,0
95
+ 5.226735930028371,4.16362530975071,1
96
+ -3.110904235282147,10.86656431270726,0
97
+ -3.186119623358709,9.62596241703919,0
98
+ -1.4781981005567992,9.94556624731452,0
99
+ 4.478593124562092,2.3772205407552702,1
100
+ -5.7965759479759935,-5.826307541241044,2
101
+ -3.348415146275389,8.705073752347108,0
csv_create.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.datasets import make_blobs
2
+ import pandas as pd
3
+ import os
4
+
5
+
6
+ def blob_data():
7
+ # Specify the file path
8
+ file_path = "blobs.csv"
9
+
10
+ # Check if the file exists
11
+ if os.path.isfile(file_path):
12
+ # Delete the file if it exists
13
+ os.remove(file_path)
14
+
15
+ # Generate 100 datapoints using make_blobs
16
+ X, y = make_blobs(n_samples=100, centers=3, n_features=2, random_state=42)
17
+
18
+ # Save data to a CSV file
19
+ df = pd.DataFrame(X, columns=["feature1", "feature2"])
20
+ df["label"] = y
21
+ df.to_csv("blobs.csv", index=False)