eswardivi commited on
Commit
ae21ded
1 Parent(s): 36996eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -25
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import numpy as np
2
  import gradio as gr
3
 
4
- from sklearn.datasets import make_biclusters
5
- from sklearn.cluster import SpectralCoclustering
6
  from sklearn.metrics import consensus_score
7
 
8
  import plotly.express as px
@@ -11,10 +11,10 @@ import plotly.express as px
11
  score = [0.0]
12
 
13
 
14
- def dataset(n_clusters=5, noise=5, n_rows=300, n_cols=300):
15
- data, rows, columns = make_biclusters(
16
  shape=(n_rows, n_cols),
17
- n_clusters=n_clusters,
18
  noise=noise,
19
  shuffle=False,
20
  random_state=0,
@@ -24,10 +24,10 @@ def dataset(n_clusters=5, noise=5, n_rows=300, n_cols=300):
24
  return fig
25
 
26
 
27
- def shuffle_dataset(n_clusters=5, noise=5, n_rows=300, n_cols=300):
28
- data, rows, columns = make_biclusters(
29
  shape=(n_rows, n_cols),
30
- n_clusters=n_clusters,
31
  noise=noise,
32
  shuffle=False,
33
  random_state=0,
@@ -40,11 +40,19 @@ def shuffle_dataset(n_clusters=5, noise=5, n_rows=300, n_cols=300):
40
  return fig
41
 
42
 
43
- def model_fit(n_cluster, noise, n_rows, n_cols, n_clusters, svd_method):
44
-
45
- data, rows, columns = make_biclusters(
 
 
 
 
 
 
 
 
46
  shape=(n_rows, n_cols),
47
- n_clusters=n_cluster,
48
  noise=noise,
49
  shuffle=False,
50
  random_state=0,
@@ -55,8 +63,11 @@ def model_fit(n_cluster, noise, n_rows, n_cols, n_clusters, svd_method):
55
  col_idx = rng.permutation(data.shape[1])
56
  data = data[row_idx][:, col_idx]
57
  fig_shuffled = px.imshow(data, title="Shuffled Data")
58
- model = SpectralCoclustering(
59
- n_clusters=n_clusters, random_state=0, svd_method=svd_method
 
 
 
60
  )
61
  model.fit(data)
62
  score.append(
@@ -64,8 +75,14 @@ def model_fit(n_cluster, noise, n_rows, n_cols, n_clusters, svd_method):
64
  )
65
  fit_data = data[np.argsort(model.row_labels_)]
66
  fit_data = fit_data[:, np.argsort(model.column_labels_)].T
67
- fig = px.imshow(fit_data, title="After Co-Clustering")
68
- return fig_original, fig_shuffled, fig
 
 
 
 
 
 
69
 
70
 
71
  def get_score():
@@ -73,9 +90,9 @@ def get_score():
73
 
74
 
75
  with gr.Blocks() as demo:
76
- gr.Markdown("## Spectral Co-Clustering")
77
  gr.Markdown(
78
- "Demo is based on the [Spectral Co-Clustering](https://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html) example from scikit-learn. The goal of co-clustering is to find subgroups of rows and columns that are highly correlated. The data is first shuffled, then the rows and columns are reordered to match the biclusters. The consensus score is a measure of how well the biclusters found by the model match the true biclusters. The score is between 0 and 1, with 1 being a perfect match."
79
  )
80
 
81
  with gr.Tab("Data"):
@@ -84,18 +101,25 @@ with gr.Blocks() as demo:
84
  with gr.Row():
85
  n_rows = gr.Slider(1, 500, label="Number of Rows", value=300, step=1)
86
  n_cols = gr.Slider(1, 500, label="Number of Columns", value=300, step=1)
87
- n_cluster = gr.Slider(1, 50, label="Number of Clusters", value=5, step=1)
 
 
 
 
 
88
  noise = gr.Slider(0, 10, label="Noise", value=5, step=1)
89
  with gr.Row():
90
  gen_btn = gr.Button("Generate Data")
91
  shu_btn = gr.Button("Shuffle Data")
92
  with gr.Row():
93
  gen_btn.click(
94
- fn=dataset, inputs=[n_cluster, noise, n_rows, n_cols], outputs=gr.Plot()
 
 
95
  )
96
  shu_btn.click(
97
  fn=shuffle_dataset,
98
- inputs=[n_cluster, noise, n_rows, n_cols],
99
  outputs=gr.Plot(),
100
  )
101
 
@@ -105,11 +129,21 @@ with gr.Blocks() as demo:
105
  with gr.Row():
106
  n_rows = gr.Slider(1, 500, label="Number of Rows", value=300, step=1)
107
  n_cols = gr.Slider(1, 500, label="Number of Columns", value=300, step=1)
108
- n_cluster = gr.Slider(1, 50, label="Number of Clusters", value=5, step=1)
 
 
 
 
 
109
  noise = gr.Slider(0, 10, label="Noise", value=5, step=1)
110
  gr.Markdown("### Model Parameters")
111
  with gr.Row():
112
- n_clusters = gr.Slider(1, 50, label="Number of Clusters", value=5, step=1)
 
 
 
 
 
113
  svd_method = gr.Dropdown(
114
  ["randomized", "arpack"], label="SVD Method", value="randomized"
115
  )
@@ -117,8 +151,17 @@ with gr.Blocks() as demo:
117
  with gr.Row():
118
  model_btn.click(
119
  fn=model_fit,
120
- inputs=[n_cluster, noise, n_rows, n_cols, n_clusters, svd_method],
121
- outputs=[gr.Plot(), gr.Plot(), gr.Plot()],
 
 
 
 
 
 
 
 
 
122
  )
123
  gr.Markdown("### Consensus Score")
124
  score_btn = gr.Button("Get Score")
 
1
  import numpy as np
2
  import gradio as gr
3
 
4
+ from sklearn.datasets import make_checkerboard
5
+ from sklearn.cluster import SpectralBiclustering
6
  from sklearn.metrics import consensus_score
7
 
8
  import plotly.express as px
 
11
  score = [0.0]
12
 
13
 
14
+ def dataset(n_cluster_rows, n_cluster_cols, noise=5, n_rows=300, n_cols=300):
15
+ data, rows, columns = make_checkerboard(
16
  shape=(n_rows, n_cols),
17
+ n_clusters=(n_cluster_rows, n_cluster_cols),
18
  noise=noise,
19
  shuffle=False,
20
  random_state=0,
 
24
  return fig
25
 
26
 
27
+ def shuffle_dataset(n_cluster_rows, n_cluster_cols, noise=5, n_rows=300, n_cols=300):
28
+ data, rows, columns = make_checkerboard(
29
  shape=(n_rows, n_cols),
30
+ n_clusters=(n_cluster_rows, n_cluster_cols),
31
  noise=noise,
32
  shuffle=False,
33
  random_state=0,
 
40
  return fig
41
 
42
 
43
+ def model_fit(
44
+ n_cluster_rows,
45
+ n_cluster_cols,
46
+ noise,
47
+ n_rows,
48
+ n_cols,
49
+ n_cluster_rows_,
50
+ n_cluster_cols_,
51
+ svd_method,
52
+ ):
53
+ data, rows, columns = make_checkerboard(
54
  shape=(n_rows, n_cols),
55
+ n_clusters=(n_cluster_rows, n_cluster_cols),
56
  noise=noise,
57
  shuffle=False,
58
  random_state=0,
 
63
  col_idx = rng.permutation(data.shape[1])
64
  data = data[row_idx][:, col_idx]
65
  fig_shuffled = px.imshow(data, title="Shuffled Data")
66
+ model = SpectralBiclustering(
67
+ n_clusters=(n_cluster_rows_, n_cluster_cols_),
68
+ method="log",
69
+ random_state=0,
70
+ svd_method=svd_method,
71
  )
72
  model.fit(data)
73
  score.append(
 
75
  )
76
  fit_data = data[np.argsort(model.row_labels_)]
77
  fit_data = fit_data[:, np.argsort(model.column_labels_)].T
78
+ fig = px.imshow(fit_data, title="After Bi-Clustering")
79
+
80
+ fig_1 = px.imshow(
81
+ np.outer(np.sort(model.row_labels_) + 1, np.sort(model.column_labels_) + 1),
82
+ title="Checkerboard structure of rearranged data",
83
+ )
84
+
85
+ return fig_original, fig_shuffled, fig, fig_1
86
 
87
 
88
  def get_score():
 
90
 
91
 
92
  with gr.Blocks() as demo:
93
+ gr.Markdown("## Spectral Bi-Clustering")
94
  gr.Markdown(
95
+ "Demo is based on the [Spectral Bi-Clustering](https://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html) example from scikit-learn. The goal of co-clustering is to find subgroups of rows and columns that are highly correlated. The data is first shuffled, then the rows and columns are reordered to match the biclusters. The consensus score is a measure of how well the biclusters found by the model match the true biclusters. The score is between 0 and 1, with 1 being a perfect match."
96
  )
97
 
98
  with gr.Tab("Data"):
 
101
  with gr.Row():
102
  n_rows = gr.Slider(1, 500, label="Number of Rows", value=300, step=1)
103
  n_cols = gr.Slider(1, 500, label="Number of Columns", value=300, step=1)
104
+ n_cluster_rows = gr.Slider(
105
+ 1, 50, label="Number of Clusters Rows", value=5, step=1
106
+ )
107
+ n_cluster_cols = gr.Slider(
108
+ 1, 50, label="Number of Clusters Columns", value=5, step=1
109
+ )
110
  noise = gr.Slider(0, 10, label="Noise", value=5, step=1)
111
  with gr.Row():
112
  gen_btn = gr.Button("Generate Data")
113
  shu_btn = gr.Button("Shuffle Data")
114
  with gr.Row():
115
  gen_btn.click(
116
+ fn=dataset,
117
+ inputs=[n_cluster_rows, n_cluster_cols, noise, n_rows, n_cols],
118
+ outputs=gr.Plot(),
119
  )
120
  shu_btn.click(
121
  fn=shuffle_dataset,
122
+ inputs=[n_cluster_rows, n_cluster_cols, noise, n_rows, n_cols],
123
  outputs=gr.Plot(),
124
  )
125
 
 
129
  with gr.Row():
130
  n_rows = gr.Slider(1, 500, label="Number of Rows", value=300, step=1)
131
  n_cols = gr.Slider(1, 500, label="Number of Columns", value=300, step=1)
132
+ n_cluster_rows = gr.Slider(
133
+ 1, 50, label="Number of Clusters Rows", value=5, step=1
134
+ )
135
+ n_cluster_cols = gr.Slider(
136
+ 1, 50, label="Number of Clusters Columns", value=5, step=1
137
+ )
138
  noise = gr.Slider(0, 10, label="Noise", value=5, step=1)
139
  gr.Markdown("### Model Parameters")
140
  with gr.Row():
141
+ n_cluster_rows_ = gr.Slider(
142
+ 1, 50, label="Number of Clusters Rows", value=5, step=1
143
+ )
144
+ n_cluster_cols_ = gr.Slider(
145
+ 1, 50, label="Number of Clusters Columns", value=5, step=1
146
+ )
147
  svd_method = gr.Dropdown(
148
  ["randomized", "arpack"], label="SVD Method", value="randomized"
149
  )
 
151
  with gr.Row():
152
  model_btn.click(
153
  fn=model_fit,
154
+ inputs=[
155
+ n_cluster_rows,
156
+ n_cluster_cols,
157
+ noise,
158
+ n_rows,
159
+ n_cols,
160
+ n_cluster_rows_,
161
+ n_cluster_cols_,
162
+ svd_method,
163
+ ],
164
+ outputs=[gr.Plot(), gr.Plot(), gr.Plot(), gr.Plot()],
165
  )
166
  gr.Markdown("### Consensus Score")
167
  score_btn = gr.Button("Get Score")