adamnarozniak commited on
Commit
3a3be3b
·
verified ·
1 Parent(s): a44b795

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +361 -0
app.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from flwr_datasets import FederatedDataset
3
+ from flwr_datasets.partitioner import (
4
+ DirichletPartitioner,
5
+ IidPartitioner,
6
+ PathologicalPartitioner,
7
+ ShardPartitioner,
8
+ LinearPartitioner,
9
+ SquarePartitioner,
10
+ ExponentialPartitioner,
11
+ NaturalIdPartitioner
12
+ )
13
+ from flwr_datasets.visualization import plot_label_distributions
14
+ import matplotlib.pyplot as plt
15
+
16
+ partitioner_types = {
17
+ "DirichletPartitioner": DirichletPartitioner,
18
+ "IidPartitioner": IidPartitioner,
19
+ "PathologicalPartitioner": PathologicalPartitioner,
20
+ "ShardPartitioner": ShardPartitioner,
21
+ "LinearPartitioner": LinearPartitioner,
22
+ "SquarePartitioner": SquarePartitioner,
23
+ "ExponentialPartitioner": ExponentialPartitioner,
24
+ "NaturalIdPartitioner": NaturalIdPartitioner,
25
+ }
26
+
27
+ partitioner_parameters = {
28
+ "DirichletPartitioner": ["num_partitions", "alpha", "partition_by", "min_partition_size", "self_balancing"],
29
+ "IidPartitioner": ["num_partitions"],
30
+ "PathologicalPartitioner": ["num_partitions", "partition_by", "num_classes_per_partition", "class_assignment_mode"],
31
+ "ShardPartitioner": ["num_partitions", "partition_by", "num_shards_per_partition", "shard_size", "keep_incomplete_shard"],
32
+ "NaturalIdPartitioner": ["partition_by"],
33
+ "LinearPartitioner": ["num_partitions"],
34
+ "SquarePartitioner": ["num_partitions"],
35
+ "ExponentialPartitioner": ["num_partitions"],
36
+ }
37
+
38
+ def update_parameter_visibility(partitioner_type):
39
+ print("calling update_parameter_visibility")
40
+ print(partitioner_type)
41
+ required_params = partitioner_parameters.get(partitioner_type, [])
42
+ updates = []
43
+ # For num_partitions_input
44
+ if "num_partitions" in required_params:
45
+ updates.append(gr.update(visible=True))
46
+ else:
47
+ updates.append(gr.update(visible=False))
48
+ # For alpha_input
49
+ if "alpha" in required_params:
50
+ updates.append(gr.update(visible=True))
51
+ else:
52
+ updates.append(gr.update(visible=False))
53
+ # For partition_by_input
54
+ if "partition_by" in required_params:
55
+ updates.append(gr.update(visible=True))
56
+ else:
57
+ updates.append(gr.update(visible=False))
58
+ # For min_partition_size_input
59
+ if "min_partition_size" in required_params:
60
+ updates.append(gr.update(visible=True))
61
+ else:
62
+ updates.append(gr.update(visible=False))
63
+ # For self_balancing_input
64
+ if "self_balancing" in required_params:
65
+ updates.append(gr.update(visible=True))
66
+ else:
67
+ updates.append(gr.update(visible=False))
68
+ # For num_classes_per_partition_input
69
+ if "num_classes_per_partition" in required_params:
70
+ updates.append(gr.update(visible=True))
71
+ else:
72
+ updates.append(gr.update(visible=False))
73
+ # For class_assignment_mode_input
74
+ if "class_assignment_mode" in required_params:
75
+ updates.append(gr.update(visible=True))
76
+ else:
77
+ updates.append(gr.update(visible=False))
78
+ # For num_shards_per_partition_input
79
+ if "num_shards_per_partition" in required_params:
80
+ updates.append(gr.update(visible=True))
81
+ else:
82
+ updates.append(gr.update(visible=False))
83
+ # For shard_size_input
84
+ if "shard_size" in required_params:
85
+ updates.append(gr.update(visible=True))
86
+ else:
87
+ updates.append(gr.update(visible=False))
88
+ # For keep_incomplete_shard_input
89
+ if "keep_incomplete_shard" in required_params:
90
+ updates.append(gr.update(visible=True))
91
+ else:
92
+ updates.append(gr.update(visible=False))
93
+ return updates
94
+
95
+ def partition_and_plot(
96
+ dataset,
97
+ partitioner_type,
98
+ num_partitions,
99
+ alpha,
100
+ partition_by,
101
+ min_partition_size,
102
+ self_balancing,
103
+ num_classes_per_partition,
104
+ class_assignment_mode,
105
+ num_shards_per_partition,
106
+ shard_size,
107
+ keep_incomplete_shard,
108
+ label_name,
109
+ title,
110
+ legend,
111
+ verbose_labels,
112
+ size_unit,
113
+ partition_id_axis,
114
+ ):
115
+ partitioner_params = {}
116
+ try:
117
+ if partitioner_type == "DirichletPartitioner":
118
+ partitioner_params = {
119
+ "num_partitions": int(num_partitions),
120
+ "partition_by": partition_by,
121
+ "alpha": float(alpha),
122
+ "min_partition_size": int(min_partition_size),
123
+ "self_balancing": self_balancing,
124
+ }
125
+ elif partitioner_type == "IidPartitioner":
126
+ partitioner_params = {
127
+ "num_partitions": int(num_partitions),
128
+ }
129
+ elif partitioner_type == "PathologicalPartitioner":
130
+ partitioner_params = {
131
+ "num_partitions": int(num_partitions),
132
+ "partition_by": partition_by,
133
+ "num_classes_per_partition": int(num_classes_per_partition),
134
+ "class_assignment_mode": class_assignment_mode,
135
+ }
136
+ elif partitioner_type == "ShardPartitioner":
137
+ partitioner_params = {
138
+ "num_partitions": int(num_partitions),
139
+ "partition_by": partition_by,
140
+ "num_shards_per_partition": int(num_shards_per_partition),
141
+ "shard_size": int(shard_size),
142
+ "keep_incomplete_shard": keep_incomplete_shard == "True",
143
+ }
144
+ elif partitioner_type == "NaturalIdPartitioner":
145
+ partitioner_params = {
146
+ "partition_by": partition_by,
147
+ }
148
+ elif partitioner_type in ["LinearPartitioner", "SquarePartitioner", "ExponentialPartitioner"]:
149
+ partitioner_params = {
150
+ "num_partitions": int(num_partitions),
151
+ }
152
+
153
+ partitioner_class = partitioner_types[partitioner_type]
154
+ partitioner = partitioner_class(**partitioner_params)
155
+ fds = FederatedDataset(
156
+ dataset=dataset,
157
+ partitioners={
158
+ "train": partitioner,
159
+ },
160
+ trust_remote_code=True,
161
+ )
162
+ partitioner = fds.partitioners["train"]
163
+ figure, axis, dataframe = plot_label_distributions(
164
+ partitioner=partitioner,
165
+ label_name=label_name,
166
+ title=title,
167
+ legend=legend,
168
+ verbose_labels=verbose_labels,
169
+ size_unit=size_unit,
170
+ partition_id_axis=partition_id_axis,
171
+ )
172
+
173
+ # Save plot to a file
174
+ plot_filename = "label_distribution.png"
175
+ figure.savefig(plot_filename, bbox_inches='tight')
176
+
177
+ # Generate the code
178
+ partitioner_params_str = "\n"
179
+ n_params = len(partitioner_params)
180
+ i = 0
181
+ for k, v in partitioner_params.items():
182
+ if isinstance(v, str):
183
+ v = f'"{v}"'
184
+ if i != (n_params - 1):
185
+ partitioner_params_str = partitioner_params_str + f"\t{k} = {v},\n"
186
+ else:
187
+ partitioner_params_str = partitioner_params_str + f"\t{k} = {v}\n"
188
+ i +=1
189
+
190
+ code = f"""
191
+ from flwr_datasets import FederatedDataset
192
+ from flwr_datasets.partitioner import {partitioner_type}
193
+ from flwr_datasets.visualization import plot_label_distributions
194
+
195
+ partitioner = {partitioner_type}({partitioner_params_str})
196
+ fds = FederatedDataset(
197
+ dataset="{dataset}",
198
+ partitioners={{
199
+ "train": partitioner,
200
+ }},
201
+ trust_remote_code=True,
202
+ )
203
+ partitioner = fds.partitioners["train"]
204
+ figure, axis, dataframe = plot_label_distributions(
205
+ partitioner=partitioner,
206
+ label_name="label",
207
+ title="{title}",
208
+ legend={legend},
209
+ verbose_labels={verbose_labels},
210
+ size_unit="{size_unit}",
211
+ partition_id_axis="{partition_id_axis}",
212
+ )
213
+ """
214
+ return plot_filename, code#, plot_filename # with df: plot_filename, code, dataframe, plot_filename
215
+ except Exception as e:
216
+ # Return error messages
217
+ error_message = str(e)
218
+ return None, f"Error: {error_message}", None, None
219
+
220
+ with gr.Blocks() as demo:
221
+ gr.Markdown("# Federated Dataset: Partitioning Visualization")
222
+ gr.Markdown("See partitioned datasets for Federated Learning experiments. The partitioning and visualization was created using `flwr-datasets`.")
223
+
224
+ with gr.Row():
225
+ with gr.Column(scale=1):
226
+ # gr.Markdown("## Federated Dataset Parameters")
227
+ with gr.Accordion("Federated Dataset Parameters", open=True):
228
+ dataset_input = gr.Textbox(label="Dataset", value="cifar10")
229
+ partitioner_type_input = gr.Dropdown(label="Partitioner", choices=list(partitioner_types.keys()), value="DirichletPartitioner")
230
+ num_partitions_input = gr.Number(label="num_partitions", value=10, visible=True)
231
+ alpha_input = gr.Number(label="alpha", value=0.3, visible=True)
232
+ partition_by_input = gr.Textbox(label="partition_by", value="label", visible=True)
233
+ min_partition_size_input = gr.Number(label="min_partition_size", value=0, visible=True)
234
+ self_balancing_input = gr.Radio(label="self_balancing", choices=[True, False], value=False, visible=True)
235
+
236
+ num_classes_per_partition_input = gr.Number(label="num_classes_per_partition", value=2, visible=False)
237
+ class_assignment_mode_input = gr.Dropdown(label="class_assignment_mode", choices=["random", "first-deterministic", "deterministic"], value="first-deterministic", visible=False)
238
+ num_shards_per_partition_input = gr.Number(label="num_shards_per_partition", value=2, visible=False)
239
+ shard_size_input = gr.Number(label="shard_size", value=0, visible=False)
240
+ keep_incomplete_shard_input = gr.Radio(label="keep_incomplete_shard", choices=["True", "False"], value="True", visible=False)
241
+ with gr.Accordion("Plot Parameters", open=False):
242
+ label_name = gr.Textbox(label="label_name", value="label")
243
+ title = gr.Textbox(label="title", value="Per Partition Label Distribution")
244
+ # legend_title = gr.Textbox(label="legend_title", value=None)
245
+ legend = gr.Radio(label="legend", choices=[True, False], value=True)
246
+ verbose_labels = gr.Radio(label="verbose_labels", choices=[True, False], value=True)
247
+ size_unit = gr.Radio(label="size_unit", choices=["absolute", "percent"], value="absolute")
248
+ partition_id_axis = gr.Radio(label="partition_id_axis", choices=["x", "y"], value="x")
249
+
250
+
251
+
252
+ # Update parameter visibility when partitioner_type_input changes
253
+ partitioner_type_input.change(
254
+ fn=update_parameter_visibility,
255
+ inputs=[partitioner_type_input],
256
+ outputs=[
257
+ num_partitions_input,
258
+ alpha_input,
259
+ partition_by_input,
260
+ min_partition_size_input,
261
+ self_balancing_input,
262
+ num_classes_per_partition_input,
263
+ class_assignment_mode_input,
264
+ num_shards_per_partition_input,
265
+ shard_size_input,
266
+ keep_incomplete_shard_input
267
+ ]
268
+ )
269
+ with gr.Column(scale=3):
270
+ gr.Markdown("## Label Distribution Plot")
271
+ plot_output = gr.Image(label="Label Distribution Plot")
272
+ submit_button = gr.Button("Partition and Plot", variant="primary")
273
+ # download_button = gr.DownloadButton(label="Download Plot", value="label_distribution.png")
274
+ gr.Markdown("## Code")
275
+ code_output = gr.Code(label="Code", language="python")
276
+ # Uncomment to show dataframe (note that it only works with header that is of type "string")
277
+ # gr.Markdown("## Partitioning DataFrame")
278
+ # dataframe_output = gr.Dataframe(label="Partitioning DataFrame")
279
+ size_skew_examples = gr.Examples(
280
+ examples=[
281
+ ["cifar10", "IidPartitioner", 10],
282
+ ["cifar10", "LinearPartitioner", 10],
283
+ ["cifar10", "SquarePartitioner", 10],
284
+ ["cifar10", "ExponentialPartitioner", 10],
285
+ ],
286
+ inputs=[
287
+ dataset_input,
288
+ partitioner_type_input,
289
+ num_partitions_input,
290
+ ],
291
+ label="Size Skew Examples",
292
+ )
293
+
294
+ dirichlet_examples = gr.Examples(
295
+ examples=[
296
+ ["cifar10", "DirichletPartitioner", 10, 0.1, "label", 0, False, "absolute"],
297
+ ["cifar10", "DirichletPartitioner", 10, 0.1, "label", 0, False, "percent"],
298
+ ],
299
+ inputs=[
300
+ dataset_input,
301
+ partitioner_type_input,
302
+ num_partitions_input,
303
+ alpha_input,
304
+ partition_by_input,
305
+ min_partition_size_input,
306
+ self_balancing_input,
307
+ size_unit,
308
+ ],
309
+ label="Dirichlet Examples",
310
+ )
311
+
312
+ pathological_examples = gr.Examples(
313
+ examples=[
314
+ ["cifar10", "PathologicalPartitioner", 10, 2, "first-deterministic", "label"],
315
+ ["cifar10", "PathologicalPartitioner", 10, 3, "deterministic", "label"],
316
+ ],
317
+ inputs=[
318
+ dataset_input,
319
+ partitioner_type_input,
320
+ num_partitions_input,
321
+ num_classes_per_partition_input,
322
+ class_assignment_mode_input,
323
+ partition_by_input,
324
+ ],
325
+ label="Pathological Examples",
326
+ )
327
+ markdown = gr.Markdown("See more tutorial, examples and documentation on [https://flower.ai/docs/datasets/index.html](https://flower.ai/docs/datasets/index.html).")
328
+
329
+ # Set up the event handler for the submit_button
330
+ submit_button.click(
331
+ fn=partition_and_plot,
332
+ inputs=[
333
+ dataset_input,
334
+ partitioner_type_input,
335
+ num_partitions_input,
336
+ alpha_input,
337
+ partition_by_input,
338
+ min_partition_size_input,
339
+ self_balancing_input,
340
+ num_classes_per_partition_input,
341
+ class_assignment_mode_input,
342
+ num_shards_per_partition_input,
343
+ shard_size_input,
344
+ keep_incomplete_shard_input,
345
+ label_name,
346
+ title,
347
+ legend,
348
+ verbose_labels,
349
+ size_unit,
350
+ partition_id_axis,
351
+ ],
352
+ outputs=[
353
+ plot_output,
354
+ code_output,
355
+ # dataframe_output,
356
+ # download_button
357
+ ]
358
+ )
359
+
360
+ if __name__ == "__main__":
361
+ demo.launch()