Spaces:
Runtime error
Runtime error
App.py
Browse files- app.py +89 -0
- blobs.csv +101 -0
- csv_create.py +21 -0
app.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from sklearn.datasets import make_blobs
|
5 |
+
from sklearn.cluster import KMeans
|
6 |
+
import pandas as pd
|
7 |
+
from csv_create import blob_data
|
8 |
+
import atexit
|
9 |
+
|
10 |
+
# Function to plot the Voronoi diagram
|
11 |
+
|
12 |
+
|
13 |
+
def plot_voronoi(X, kmeans, added_points):
|
14 |
+
# Create a meshgrid of points to plot the Voronoi diagram
|
15 |
+
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
|
16 |
+
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
|
17 |
+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
|
18 |
+
np.arange(y_min, y_max, 0.1))
|
19 |
+
|
20 |
+
# Predict the cluster labels for the meshgrid points
|
21 |
+
Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
|
22 |
+
Z = Z.reshape(xx.shape)
|
23 |
+
|
24 |
+
# Predict the cluster labels for the data points
|
25 |
+
y_pred = kmeans.predict(X)
|
26 |
+
|
27 |
+
# Plot the Voronoi diagram and the data points
|
28 |
+
plt.figure(figsize=(10, 8))
|
29 |
+
plt.contourf(xx, yy, Z, alpha=0.4)
|
30 |
+
plt.scatter(X[:, 0], X[:, 1], c=y_pred, alpha=0.8, edgecolors='k')
|
31 |
+
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[
|
32 |
+
:, 1], marker='*', s=300, c='r')
|
33 |
+
if added_points:
|
34 |
+
for point in added_points:
|
35 |
+
plt.scatter(point[:, 0], point[:, 1], marker='x',
|
36 |
+
s=200, linewidths=3, color='black')
|
37 |
+
plt.xlabel('Feature 1')
|
38 |
+
plt.ylabel('Feature 2')
|
39 |
+
plt.title('KMeans Clustering')
|
40 |
+
st.pyplot(plt)
|
41 |
+
|
42 |
+
|
43 |
+
def run_kmeans_app():
|
44 |
+
# Create a clustered dataset using make_blobs
|
45 |
+
df = pd.read_csv('blobs.csv')
|
46 |
+
X = df[['feature1', 'feature2']].values
|
47 |
+
|
48 |
+
# Initialize KMeans with 3 clusters
|
49 |
+
kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
|
50 |
+
|
51 |
+
# Streamlit app to add random data points and visualize the changing predictions
|
52 |
+
st.title('KMeans Clustering')
|
53 |
+
st.write('This app allows you to add random data points to an initial clustered dataset and visualize the changing predictions using a Voronoi diagram.')
|
54 |
+
st.write('Initial dataset:')
|
55 |
+
plot_voronoi(X, kmeans, added_points=[])
|
56 |
+
|
57 |
+
# Add random data points to the initial dataset
|
58 |
+
num_points = 1
|
59 |
+
added_points = []
|
60 |
+
|
61 |
+
button = st.button(f'Add random data point(s)')
|
62 |
+
if button:
|
63 |
+
# Generate a random data point
|
64 |
+
for i in range(num_points):
|
65 |
+
new_point = np.random.uniform(
|
66 |
+
low=X.min(), high=X.max(), size=(1, 2))
|
67 |
+
# added_points.append(new_point)
|
68 |
+
# Add the random data point to the dataset
|
69 |
+
X = np.vstack((X, new_point))
|
70 |
+
# Update the KMeans model with the new data point
|
71 |
+
kmeans = KMeans(n_clusters=3, random_state=42).fit(X)
|
72 |
+
df_new = pd.DataFrame(new_point, columns=["feature1", "feature2"])
|
73 |
+
df_new["label"] = kmeans.predict(new_point)
|
74 |
+
with open("blobs.csv", "a") as f:
|
75 |
+
df_new.to_csv(f, header=False, index=False)
|
76 |
+
# Plot the updated Voronoi diagram
|
77 |
+
st.write(f'Updated dataset after adding {i+1} random data point(s):')
|
78 |
+
plot_voronoi(X, kmeans, added_points)
|
79 |
+
|
80 |
+
# Concatenate the added points with the initial dataset
|
81 |
+
if added_points:
|
82 |
+
added_points = np.vstack(added_points)
|
83 |
+
X = np.vstack((X[:-len(added_points)], added_points))
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == '__main__':
|
87 |
+
run_kmeans_app()
|
88 |
+
|
89 |
+
atexit.register(blob_data)
|
blobs.csv
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
feature1,feature2,label
|
2 |
+
-7.726420909219675,-8.39495681796181,2
|
3 |
+
5.453396053597771,0.7423053675067768,1
|
4 |
+
-2.978672008987702,9.556846171784287,0
|
5 |
+
6.042673147164201,0.5713186211484511,1
|
6 |
+
-6.521839830802987,-6.319325066907712,2
|
7 |
+
3.649342511097413,1.40687195433796,1
|
8 |
+
-2.1779341916491863,9.98983125532068,0
|
9 |
+
4.4202069483905895,2.3302822554524782,1
|
10 |
+
4.736956385576142,2.9418146744736213,1
|
11 |
+
-3.6601912004750528,9.389984146543993,0
|
12 |
+
-3.053580347577933,9.125208717908187,0
|
13 |
+
-6.65216725654714,-5.572966838993518,2
|
14 |
+
-6.357685625534373,-6.583124920042761,2
|
15 |
+
-3.6155325970587784,7.818079504117651,0
|
16 |
+
-1.7707310430573397,9.185654409388292,0
|
17 |
+
-7.950519689212382,-6.397637178032761,2
|
18 |
+
-6.602936391821251,-6.052926344239923,2
|
19 |
+
-2.581207744633084,10.017819026090345,0
|
20 |
+
-7.763484627352403,-6.726384487330419,2
|
21 |
+
-6.406389566577725,-6.95293850593282,2
|
22 |
+
-2.9726153158652124,8.548556374628065,0
|
23 |
+
-6.956728900565374,-6.538957618459302,2
|
24 |
+
-7.326142143218291,-6.023710798952474,2
|
25 |
+
-2.147802017544336,10.55232269466429,0
|
26 |
+
-2.5450236621627016,10.578929784012328,0
|
27 |
+
-2.9698363940125376,10.071408354417237,0
|
28 |
+
3.224508094177687,1.552524361175373,1
|
29 |
+
-6.253959843386264,-7.737267149692229,2
|
30 |
+
-7.854308861378591,-6.093024989533495,2
|
31 |
+
-8.116577902029352,-8.200566206360223,2
|
32 |
+
-7.559651912729761,-6.647855896114943,2
|
33 |
+
4.935999113292677,2.234224956120621,1
|
34 |
+
4.447517871446979,2.2747170262743444,1
|
35 |
+
-5.721031612143866,-7.7007919116276575,2
|
36 |
+
-0.9299848075453587,9.78172085735123,0
|
37 |
+
-3.109836312971555,8.722592378405045,0
|
38 |
+
-2.4416694183648264,7.589537941984865,0
|
39 |
+
-2.185113653657955,8.629203847782005,0
|
40 |
+
5.5552809539301755,2.3019207936004165,1
|
41 |
+
4.731639612763604,-0.0143992306601608,1
|
42 |
+
-8.257296559108362,-7.8179346331910695,2
|
43 |
+
-2.9883718608980403,8.828627151534505,0
|
44 |
+
4.605167066522858,0.8044916463212,1
|
45 |
+
-3.837383671951181,9.211147364067445,0
|
46 |
+
-2.6248459054409907,8.713182432609033,0
|
47 |
+
3.5775751225019965,2.4467621145759137,1
|
48 |
+
-8.487110425712498,-6.695475734743642,2
|
49 |
+
-6.706446265300088,-6.49479221354711,2
|
50 |
+
-6.866625299273363,-5.42657551611863,2
|
51 |
+
3.8313852333349137,1.4714126403561956,1
|
52 |
+
2.020133732138357,2.795072188315956,1
|
53 |
+
4.644992292870563,1.7385825505655852,1
|
54 |
+
-1.6966718006585522,10.370526156769145,0
|
55 |
+
-6.619744396902846,-6.098286721498637,2
|
56 |
+
-6.05756703115678,-4.9833166106219995,2
|
57 |
+
-7.103089976477121,-6.1661090991838545,2
|
58 |
+
-3.5220287433871738,9.328533460793595,0
|
59 |
+
-2.267235351486716,7.101005883540523,0
|
60 |
+
6.117772880969618,1.4548994656670846,1
|
61 |
+
-4.234115455565783,8.45199859895735,0
|
62 |
+
-6.5865547178525885,-7.5944610113023145,2
|
63 |
+
3.937825742350749,1.6455075373429637,1
|
64 |
+
-7.125015307154141,-7.633845757633436,2
|
65 |
+
2.72110762092906,1.946655808491515,1
|
66 |
+
-7.1442840243892265,-4.159940426686328,2
|
67 |
+
-6.665533447021066,-8.125848371987935,2
|
68 |
+
4.7001090461691275,4.436411796426018,1
|
69 |
+
-7.769141620776793,-7.695919878241385,2
|
70 |
+
4.110118632461063,2.486437117054088,1
|
71 |
+
4.897429226950866,1.8987237681745648,1
|
72 |
+
4.297164319701332,1.170892414719113,1
|
73 |
+
-6.629134340805393,-6.533661383778971,2
|
74 |
+
-8.070930688353918,-6.2235559846421165,2
|
75 |
+
-2.1655793334842888,7.251245972835587,0
|
76 |
+
4.739530201315743,1.4696940298245327,1
|
77 |
+
-5.916251061906948,-6.467328666339449,2
|
78 |
+
5.430910783271148,1.063782229145993,1
|
79 |
+
-6.82141847270527,-8.023079891106569,2
|
80 |
+
6.526064737438632,2.147747496772571,1
|
81 |
+
3.0892154051619687,2.0417326587467595,1
|
82 |
+
-2.147561598005116,8.369166373593197,0
|
83 |
+
3.856625543891864,1.6511081677350563,1
|
84 |
+
-1.686652710949561,7.793442478227299,0
|
85 |
+
-5.013852680006513,-6.406276672364159,2
|
86 |
+
-2.5226948477906843,7.956575199242421,0
|
87 |
+
-2.300334028047995,7.054616004318546,0
|
88 |
+
-1.0435488541311961,8.788509827711787,0
|
89 |
+
3.720454601994298,3.5231040889582714,1
|
90 |
+
-3.9877196134201776,8.294441919803614,0
|
91 |
+
4.247770683095943,0.5096547358086134,1
|
92 |
+
4.726925904466273,1.6741623334748645,1
|
93 |
+
5.782701650743122,2.7251027166275064,1
|
94 |
+
-3.417221698573961,7.60198242686303,0
|
95 |
+
5.226735930028371,4.16362530975071,1
|
96 |
+
-3.110904235282147,10.86656431270726,0
|
97 |
+
-3.186119623358709,9.62596241703919,0
|
98 |
+
-1.4781981005567992,9.94556624731452,0
|
99 |
+
4.478593124562092,2.3772205407552702,1
|
100 |
+
-5.7965759479759935,-5.826307541241044,2
|
101 |
+
-3.348415146275389,8.705073752347108,0
|
csv_create.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.datasets import make_blobs
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
def blob_data():
|
7 |
+
# Specify the file path
|
8 |
+
file_path = "blobs.csv"
|
9 |
+
|
10 |
+
# Check if the file exists
|
11 |
+
if os.path.isfile(file_path):
|
12 |
+
# Delete the file if it exists
|
13 |
+
os.remove(file_path)
|
14 |
+
|
15 |
+
# Generate 100 datapoints using make_blobs
|
16 |
+
X, y = make_blobs(n_samples=100, centers=3, n_features=2, random_state=42)
|
17 |
+
|
18 |
+
# Save data to a CSV file
|
19 |
+
df = pd.DataFrame(X, columns=["feature1", "feature2"])
|
20 |
+
df["label"] = y
|
21 |
+
df.to_csv("blobs.csv", index=False)
|