mervenoyan commited on
Commit
75df68a
1 Parent(s): 4bfd1a6

initial commit

Browse files
Files changed (2) hide show
  1. app.py +169 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ import warnings
4
+
5
+ from functools import partial
6
+ import gradio as gr
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+
10
+ from sklearn import cluster, datasets
11
+ from sklearn.preprocessing import StandardScaler
12
+ from itertools import cycle, islice
13
+
14
+
15
+
16
+ def train_models(selected_data, n_samples, n_clusters, n_neighbors, cls_name):
17
+ np.random.seed(0)
18
+ default_base = {"n_neighbors": 10, "n_clusters": 3}
19
+ noisy_circles = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
20
+ noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05)
21
+ blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
22
+ no_structure = np.random.rand(n_samples, 2), None
23
+
24
+ # Anisotropicly distributed data
25
+ random_state = 170
26
+ X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
27
+ transformation = [[0.6, -0.6], [-0.4, 0.8]]
28
+ X_aniso = np.dot(X, transformation)
29
+ aniso = (X_aniso, y)
30
+
31
+ # blobs with varied variances
32
+ varied = datasets.make_blobs(
33
+ n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state
34
+ )
35
+
36
+ dataset_list = {
37
+ "Noisy Circles": [noisy_circles, {"n_clusters": n_clusters}],
38
+ "Noisy Moons": [noisy_moons, {"n_clusters": n_clusters}],
39
+ "Varied": [varied, {"n_neighbors": n_neighbors}],
40
+ "Aniso": [aniso, {"n_neighbors": n_neighbors}],
41
+ "Blobs": [blobs, {}],
42
+ "No Structure": [no_structure, {}],
43
+ }
44
+
45
+ params = default_base.copy()
46
+ params.update(dataset_list[selected_data][1])
47
+
48
+ X, y = dataset_list[selected_data][0]
49
+
50
+ X = StandardScaler().fit_transform(X)
51
+
52
+ ward = cluster.AgglomerativeClustering(
53
+ n_clusters=params["n_clusters"], linkage="ward"
54
+ )
55
+ complete = cluster.AgglomerativeClustering(
56
+ n_clusters=params["n_clusters"], linkage="complete"
57
+ )
58
+ average = cluster.AgglomerativeClustering(
59
+ n_clusters=params["n_clusters"], linkage="average"
60
+ )
61
+ single = cluster.AgglomerativeClustering(
62
+ n_clusters=params["n_clusters"], linkage="single"
63
+ )
64
+
65
+ clustering_algorithms = {
66
+ "Single Linkage": single,
67
+ "Average Linkage": average,
68
+ "Complete Linkage": complete,
69
+ "Ward Linkage": ward,
70
+ }
71
+
72
+ t0 = time.time()
73
+ algorithm = clustering_algorithms[cls_name]
74
+ # catch warnings related to kneighbors_graph
75
+ with warnings.catch_warnings():
76
+ warnings.filterwarnings(
77
+ "ignore",
78
+ message="the number of connected components of the "
79
+ + "connectivity matrix is [0-9]{1,2}"
80
+ + " > 1. Completing it to avoid stopping the tree early.",
81
+ category=UserWarning,
82
+ )
83
+ algorithm.fit(X)
84
+
85
+ t1 = time.time()
86
+ if hasattr(algorithm, "labels_"):
87
+ y_pred = algorithm.labels_.astype(int)
88
+ else:
89
+ y_pred = algorithm.predict(X)
90
+
91
+ fig, ax = plt.subplots()
92
+
93
+ colors = np.array(
94
+ list(
95
+ islice(
96
+ cycle(
97
+ [
98
+ "#377eb8",
99
+ "#ff7f00",
100
+ "#4daf4a",
101
+ "#f781bf",
102
+ "#a65628",
103
+ "#984ea3",
104
+ "#999999",
105
+ "#e41a1c",
106
+ "#dede00",
107
+ ]
108
+ ),
109
+ int(max(y_pred) + 1),
110
+ )
111
+ )
112
+ )
113
+ ax.scatter(X[:, 0], X[:, 1], color=colors[y_pred])
114
+
115
+ ax.set_xlim(-2.5, 2.5)
116
+ ax.set_ylim(-2.5, 2.5)
117
+ ax.set_xticks(())
118
+ ax.set_yticks(())
119
+
120
+ return fig
121
+
122
+
123
+ def iter_grid(n_rows, n_cols):
124
+ # create a grid using gradio Block
125
+ for _ in range(n_rows):
126
+ with gr.Row():
127
+ for _ in range(n_cols):
128
+ with gr.Column():
129
+ yield
130
+
131
+ title = "Compare linkages in hierarchical clustering"
132
+ with gr.Blocks(title=title) as demo:
133
+ gr.Markdown(f"## {title}")
134
+ gr.Markdown("This app demonstrates different linkage methods in"
135
+ " hierarchical clustering")
136
+
137
+
138
+ input_models = ["Single Linkage", "Average Linkage", "Complete Linkage",
139
+ "Ward Linkage"]
140
+ input_data = gr.Radio(
141
+ choices=["Noisy Circles", "Noisy Moons",
142
+ "Varied", "Aniso", "Blobs", "No Structure"],
143
+ value="Noisy Moons"
144
+ )
145
+ n_samples = gr.Slider(minimum=500, maximum=2000, step=50,
146
+ label = "Number of Samples")
147
+
148
+ n_neighbors = gr.Slider(minimum=2, maximum=5, step=1,
149
+ label = "Number of neighbors")
150
+ n_clusters = gr.Slider(minimum=2, maximum=5, step=1,
151
+ label = "Number of Clusters")
152
+ counter = 0
153
+
154
+ for _ in iter_grid(2, 5):
155
+ if counter >= len(input_models):
156
+ break
157
+
158
+ input_model = input_models[counter]
159
+ plot = gr.Plot(label=input_model)
160
+ fn = partial(train_models, cls_name=input_model)
161
+ input_data.change(fn=fn, inputs=[input_data, n_samples, n_clusters, n_neighbors], outputs=plot)
162
+ n_samples.change(fn=fn, inputs=[input_data, n_samples, n_clusters, n_neighbors], outputs=plot)
163
+
164
+ n_neighbors.change(fn=fn, inputs=[input_data, n_samples, n_clusters, n_neighbors], outputs=plot)
165
+ n_clusters.change(fn=fn, inputs=[input_data, n_samples, n_clusters, n_neighbors], outputs=plot)
166
+ counter += 1
167
+
168
+
169
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn
2
+ matplotlib