MohamedRashad commited on
Commit
3f18618
1 Parent(s): e27f26a
Coding_Challenge_for_Fatima_Fellowship.ipynb ADDED
@@ -0,0 +1,1294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "name": "Coding Challenge for Fatima Fellowship",
7
+ "provenance": [],
8
+ "collapsed_sections": []
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "accelerator": "GPU",
15
+ "widgets": {
16
+ "application/vnd.jupyter.widget-state+json": {
17
+ "b6f3481b22fa43758c6273992d49f800": {
18
+ "model_module": "@jupyter-widgets/controls",
19
+ "model_name": "HBoxModel",
20
+ "model_module_version": "1.5.0",
21
+ "state": {
22
+ "_dom_classes": [],
23
+ "_model_module": "@jupyter-widgets/controls",
24
+ "_model_module_version": "1.5.0",
25
+ "_model_name": "HBoxModel",
26
+ "_view_count": null,
27
+ "_view_module": "@jupyter-widgets/controls",
28
+ "_view_module_version": "1.5.0",
29
+ "_view_name": "HBoxView",
30
+ "box_style": "",
31
+ "children": [
32
+ "IPY_MODEL_163c4c11370d46e39fed08c38cc64965",
33
+ "IPY_MODEL_90467627aa984891907e43169ebf2c64",
34
+ "IPY_MODEL_c5bbd049370f4925a6ad3a548341512f"
35
+ ],
36
+ "layout": "IPY_MODEL_469c0688a66c4140b0c22402877bfc31"
37
+ }
38
+ },
39
+ "163c4c11370d46e39fed08c38cc64965": {
40
+ "model_module": "@jupyter-widgets/controls",
41
+ "model_name": "HTMLModel",
42
+ "model_module_version": "1.5.0",
43
+ "state": {
44
+ "_dom_classes": [],
45
+ "_model_module": "@jupyter-widgets/controls",
46
+ "_model_module_version": "1.5.0",
47
+ "_model_name": "HTMLModel",
48
+ "_view_count": null,
49
+ "_view_module": "@jupyter-widgets/controls",
50
+ "_view_module_version": "1.5.0",
51
+ "_view_name": "HTMLView",
52
+ "description": "",
53
+ "description_tooltip": null,
54
+ "layout": "IPY_MODEL_e6b62974b70349caa135ad10e461c546",
55
+ "placeholder": "​",
56
+ "style": "IPY_MODEL_b6886b2850104e719ec862a29a977548",
57
+ "value": "100%"
58
+ }
59
+ },
60
+ "90467627aa984891907e43169ebf2c64": {
61
+ "model_module": "@jupyter-widgets/controls",
62
+ "model_name": "FloatProgressModel",
63
+ "model_module_version": "1.5.0",
64
+ "state": {
65
+ "_dom_classes": [],
66
+ "_model_module": "@jupyter-widgets/controls",
67
+ "_model_module_version": "1.5.0",
68
+ "_model_name": "FloatProgressModel",
69
+ "_view_count": null,
70
+ "_view_module": "@jupyter-widgets/controls",
71
+ "_view_module_version": "1.5.0",
72
+ "_view_name": "ProgressView",
73
+ "bar_style": "success",
74
+ "description": "",
75
+ "description_tooltip": null,
76
+ "layout": "IPY_MODEL_2f72dc13ee934d0cb360b7b6fbfdbe1b",
77
+ "max": 2,
78
+ "min": 0,
79
+ "orientation": "horizontal",
80
+ "style": "IPY_MODEL_1c011147729f4bbfba0b6ef1455144de",
81
+ "value": 2
82
+ }
83
+ },
84
+ "c5bbd049370f4925a6ad3a548341512f": {
85
+ "model_module": "@jupyter-widgets/controls",
86
+ "model_name": "HTMLModel",
87
+ "model_module_version": "1.5.0",
88
+ "state": {
89
+ "_dom_classes": [],
90
+ "_model_module": "@jupyter-widgets/controls",
91
+ "_model_module_version": "1.5.0",
92
+ "_model_name": "HTMLModel",
93
+ "_view_count": null,
94
+ "_view_module": "@jupyter-widgets/controls",
95
+ "_view_module_version": "1.5.0",
96
+ "_view_name": "HTMLView",
97
+ "description": "",
98
+ "description_tooltip": null,
99
+ "layout": "IPY_MODEL_2b071dcb5f9a4291bef5d9ff9f7053c5",
100
+ "placeholder": "​",
101
+ "style": "IPY_MODEL_efba12ce4437427bb45b578709e85c32",
102
+ "value": " 2/2 [00:00<00:00, 46.22it/s]"
103
+ }
104
+ },
105
+ "469c0688a66c4140b0c22402877bfc31": {
106
+ "model_module": "@jupyter-widgets/base",
107
+ "model_name": "LayoutModel",
108
+ "model_module_version": "1.2.0",
109
+ "state": {
110
+ "_model_module": "@jupyter-widgets/base",
111
+ "_model_module_version": "1.2.0",
112
+ "_model_name": "LayoutModel",
113
+ "_view_count": null,
114
+ "_view_module": "@jupyter-widgets/base",
115
+ "_view_module_version": "1.2.0",
116
+ "_view_name": "LayoutView",
117
+ "align_content": null,
118
+ "align_items": null,
119
+ "align_self": null,
120
+ "border": null,
121
+ "bottom": null,
122
+ "display": null,
123
+ "flex": null,
124
+ "flex_flow": null,
125
+ "grid_area": null,
126
+ "grid_auto_columns": null,
127
+ "grid_auto_flow": null,
128
+ "grid_auto_rows": null,
129
+ "grid_column": null,
130
+ "grid_gap": null,
131
+ "grid_row": null,
132
+ "grid_template_areas": null,
133
+ "grid_template_columns": null,
134
+ "grid_template_rows": null,
135
+ "height": null,
136
+ "justify_content": null,
137
+ "justify_items": null,
138
+ "left": null,
139
+ "margin": null,
140
+ "max_height": null,
141
+ "max_width": null,
142
+ "min_height": null,
143
+ "min_width": null,
144
+ "object_fit": null,
145
+ "object_position": null,
146
+ "order": null,
147
+ "overflow": null,
148
+ "overflow_x": null,
149
+ "overflow_y": null,
150
+ "padding": null,
151
+ "right": null,
152
+ "top": null,
153
+ "visibility": null,
154
+ "width": null
155
+ }
156
+ },
157
+ "e6b62974b70349caa135ad10e461c546": {
158
+ "model_module": "@jupyter-widgets/base",
159
+ "model_name": "LayoutModel",
160
+ "model_module_version": "1.2.0",
161
+ "state": {
162
+ "_model_module": "@jupyter-widgets/base",
163
+ "_model_module_version": "1.2.0",
164
+ "_model_name": "LayoutModel",
165
+ "_view_count": null,
166
+ "_view_module": "@jupyter-widgets/base",
167
+ "_view_module_version": "1.2.0",
168
+ "_view_name": "LayoutView",
169
+ "align_content": null,
170
+ "align_items": null,
171
+ "align_self": null,
172
+ "border": null,
173
+ "bottom": null,
174
+ "display": null,
175
+ "flex": null,
176
+ "flex_flow": null,
177
+ "grid_area": null,
178
+ "grid_auto_columns": null,
179
+ "grid_auto_flow": null,
180
+ "grid_auto_rows": null,
181
+ "grid_column": null,
182
+ "grid_gap": null,
183
+ "grid_row": null,
184
+ "grid_template_areas": null,
185
+ "grid_template_columns": null,
186
+ "grid_template_rows": null,
187
+ "height": null,
188
+ "justify_content": null,
189
+ "justify_items": null,
190
+ "left": null,
191
+ "margin": null,
192
+ "max_height": null,
193
+ "max_width": null,
194
+ "min_height": null,
195
+ "min_width": null,
196
+ "object_fit": null,
197
+ "object_position": null,
198
+ "order": null,
199
+ "overflow": null,
200
+ "overflow_x": null,
201
+ "overflow_y": null,
202
+ "padding": null,
203
+ "right": null,
204
+ "top": null,
205
+ "visibility": null,
206
+ "width": null
207
+ }
208
+ },
209
+ "b6886b2850104e719ec862a29a977548": {
210
+ "model_module": "@jupyter-widgets/controls",
211
+ "model_name": "DescriptionStyleModel",
212
+ "model_module_version": "1.5.0",
213
+ "state": {
214
+ "_model_module": "@jupyter-widgets/controls",
215
+ "_model_module_version": "1.5.0",
216
+ "_model_name": "DescriptionStyleModel",
217
+ "_view_count": null,
218
+ "_view_module": "@jupyter-widgets/base",
219
+ "_view_module_version": "1.2.0",
220
+ "_view_name": "StyleView",
221
+ "description_width": ""
222
+ }
223
+ },
224
+ "2f72dc13ee934d0cb360b7b6fbfdbe1b": {
225
+ "model_module": "@jupyter-widgets/base",
226
+ "model_name": "LayoutModel",
227
+ "model_module_version": "1.2.0",
228
+ "state": {
229
+ "_model_module": "@jupyter-widgets/base",
230
+ "_model_module_version": "1.2.0",
231
+ "_model_name": "LayoutModel",
232
+ "_view_count": null,
233
+ "_view_module": "@jupyter-widgets/base",
234
+ "_view_module_version": "1.2.0",
235
+ "_view_name": "LayoutView",
236
+ "align_content": null,
237
+ "align_items": null,
238
+ "align_self": null,
239
+ "border": null,
240
+ "bottom": null,
241
+ "display": null,
242
+ "flex": null,
243
+ "flex_flow": null,
244
+ "grid_area": null,
245
+ "grid_auto_columns": null,
246
+ "grid_auto_flow": null,
247
+ "grid_auto_rows": null,
248
+ "grid_column": null,
249
+ "grid_gap": null,
250
+ "grid_row": null,
251
+ "grid_template_areas": null,
252
+ "grid_template_columns": null,
253
+ "grid_template_rows": null,
254
+ "height": null,
255
+ "justify_content": null,
256
+ "justify_items": null,
257
+ "left": null,
258
+ "margin": null,
259
+ "max_height": null,
260
+ "max_width": null,
261
+ "min_height": null,
262
+ "min_width": null,
263
+ "object_fit": null,
264
+ "object_position": null,
265
+ "order": null,
266
+ "overflow": null,
267
+ "overflow_x": null,
268
+ "overflow_y": null,
269
+ "padding": null,
270
+ "right": null,
271
+ "top": null,
272
+ "visibility": null,
273
+ "width": null
274
+ }
275
+ },
276
+ "1c011147729f4bbfba0b6ef1455144de": {
277
+ "model_module": "@jupyter-widgets/controls",
278
+ "model_name": "ProgressStyleModel",
279
+ "model_module_version": "1.5.0",
280
+ "state": {
281
+ "_model_module": "@jupyter-widgets/controls",
282
+ "_model_module_version": "1.5.0",
283
+ "_model_name": "ProgressStyleModel",
284
+ "_view_count": null,
285
+ "_view_module": "@jupyter-widgets/base",
286
+ "_view_module_version": "1.2.0",
287
+ "_view_name": "StyleView",
288
+ "bar_color": null,
289
+ "description_width": ""
290
+ }
291
+ },
292
+ "2b071dcb5f9a4291bef5d9ff9f7053c5": {
293
+ "model_module": "@jupyter-widgets/base",
294
+ "model_name": "LayoutModel",
295
+ "model_module_version": "1.2.0",
296
+ "state": {
297
+ "_model_module": "@jupyter-widgets/base",
298
+ "_model_module_version": "1.2.0",
299
+ "_model_name": "LayoutModel",
300
+ "_view_count": null,
301
+ "_view_module": "@jupyter-widgets/base",
302
+ "_view_module_version": "1.2.0",
303
+ "_view_name": "LayoutView",
304
+ "align_content": null,
305
+ "align_items": null,
306
+ "align_self": null,
307
+ "border": null,
308
+ "bottom": null,
309
+ "display": null,
310
+ "flex": null,
311
+ "flex_flow": null,
312
+ "grid_area": null,
313
+ "grid_auto_columns": null,
314
+ "grid_auto_flow": null,
315
+ "grid_auto_rows": null,
316
+ "grid_column": null,
317
+ "grid_gap": null,
318
+ "grid_row": null,
319
+ "grid_template_areas": null,
320
+ "grid_template_columns": null,
321
+ "grid_template_rows": null,
322
+ "height": null,
323
+ "justify_content": null,
324
+ "justify_items": null,
325
+ "left": null,
326
+ "margin": null,
327
+ "max_height": null,
328
+ "max_width": null,
329
+ "min_height": null,
330
+ "min_width": null,
331
+ "object_fit": null,
332
+ "object_position": null,
333
+ "order": null,
334
+ "overflow": null,
335
+ "overflow_x": null,
336
+ "overflow_y": null,
337
+ "padding": null,
338
+ "right": null,
339
+ "top": null,
340
+ "visibility": null,
341
+ "width": null
342
+ }
343
+ },
344
+ "efba12ce4437427bb45b578709e85c32": {
345
+ "model_module": "@jupyter-widgets/controls",
346
+ "model_name": "DescriptionStyleModel",
347
+ "model_module_version": "1.5.0",
348
+ "state": {
349
+ "_model_module": "@jupyter-widgets/controls",
350
+ "_model_module_version": "1.5.0",
351
+ "_model_name": "DescriptionStyleModel",
352
+ "_view_count": null,
353
+ "_view_module": "@jupyter-widgets/base",
354
+ "_view_module_version": "1.2.0",
355
+ "_view_name": "StyleView",
356
+ "description_width": ""
357
+ }
358
+ }
359
+ }
360
+ }
361
+ },
362
+ "cells": [
363
+ {
364
+ "cell_type": "markdown",
365
+ "metadata": {
366
+ "id": "eBpjBBZc6IvA"
367
+ },
368
+ "source": [
369
+ "# Fatima Fellowship Quick Coding Challenge (Pick 1)\n",
370
+ "\n",
371
+ "Thank you for applying to the Fatima Fellowship. To help us select the Fellows and assess your ability to do machine learning research, we are asking that you complete a short coding challenge. Please pick **1 of these 5** coding challenges, whichever is most aligned with your interests. \n",
372
+ "\n",
373
+ "**Due date: 1 week**\n",
374
+ "\n",
375
+ "**How to submit**: Please make a copy of this colab notebook, add your code and results, and submit your colab notebook to the submission link below. If you have never used a colab notebook, [check out this video](https://www.youtube.com/watch?v=i-HnvsehuSw).\n",
376
+ "\n",
377
+ "**Submission link**: https://airtable.com/shrXy3QKSsO2yALd3"
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "markdown",
382
+ "metadata": {
383
+ "id": "braBzmRpMe7_"
384
+ },
385
+ "source": [
386
+ "# 1. Deep Learning for Vision"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "markdown",
391
+ "metadata": {
392
+ "id": "1IWw-NZf5WfF"
393
+ },
394
+ "source": [
395
+ "**Upside down detector**: Train a model to detect if images are upside down\n",
396
+ "\n",
397
+ "* Pick a dataset of natural images (we suggest looking at datasets on the [Hugging Face Hub](https://huggingface.co/datasets?task_categories=task_categories:image-classification&sort=downloads))\n",
398
+ "* Synthetically turn some of images upside down. Create a training and test set.\n",
399
+ "* Build a neural network (using Tensorflow, PyTorch, or any framework you like)\n",
400
+ "* Train it to classify image orientation until a reasonable accuracy is reached\n",
401
+ "* [Upload the the model to the Hugging Face Hub](https://huggingface.co/docs/hub/adding-a-model), and add a link to your model below.\n",
402
+ "* Look at some of the images that were classified incorrectly. Please explain what you might do to improve your model's performance on these images in the future (you do not need to impelement these suggestions)\n",
403
+ "\n",
404
+ "**Submission instructions**: Please write your code below and include some examples of images that were classified"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "markdown",
409
+ "source": [
410
+ "## Self-Supervised Learning for the rescue"
411
+ ],
412
+ "metadata": {
413
+ "id": "nI7wrS6ETRx1"
414
+ }
415
+ },
416
+ {
417
+ "cell_type": "code",
418
+ "source": [
419
+ "# !pip install opencv-python-headless==4.5.2.52\n",
420
+ "# !pip install -q fiftyone\n",
421
+ "\n",
422
+ "!pip install -q datasets"
423
+ ],
424
+ "metadata": {
425
+ "id": "98u1eeCDTz6K"
426
+ },
427
+ "execution_count": 1,
428
+ "outputs": []
429
+ },
430
+ {
431
+ "cell_type": "code",
432
+ "source": [
433
+ "# import fiftyone\n",
434
+ "\n",
435
+ "# dataset = fiftyone.zoo.load_zoo_dataset(\"open-images-v6\", split=\"validation\", max_samples=50000)\n",
436
+ "# dataset\n",
437
+ "\n",
438
+ "from datasets import load_dataset\n",
439
+ "\n",
440
+ "dataset = load_dataset(\"cifar10\")\n",
441
+ "print(dataset.keys())\n",
442
+ "all_images = dataset[\"train\"][\"img\"] + dataset[\"test\"][\"img\"]\n",
443
+ "print(len(all_images))"
444
+ ],
445
+ "metadata": {
446
+ "id": "K2GJaYBpw91T",
447
+ "outputId": "9d6808f5-c63e-43d2-cb0d-ec3b2fae6628",
448
+ "colab": {
449
+ "base_uri": "https://localhost:8080/",
450
+ "height": 104,
451
+ "referenced_widgets": [
452
+ "b6f3481b22fa43758c6273992d49f800",
453
+ "163c4c11370d46e39fed08c38cc64965",
454
+ "90467627aa984891907e43169ebf2c64",
455
+ "c5bbd049370f4925a6ad3a548341512f",
456
+ "469c0688a66c4140b0c22402877bfc31",
457
+ "e6b62974b70349caa135ad10e461c546",
458
+ "b6886b2850104e719ec862a29a977548",
459
+ "2f72dc13ee934d0cb360b7b6fbfdbe1b",
460
+ "1c011147729f4bbfba0b6ef1455144de",
461
+ "2b071dcb5f9a4291bef5d9ff9f7053c5",
462
+ "efba12ce4437427bb45b578709e85c32"
463
+ ]
464
+ }
465
+ },
466
+ "execution_count": 2,
467
+ "outputs": [
468
+ {
469
+ "output_type": "stream",
470
+ "name": "stderr",
471
+ "text": [
472
+ "Reusing dataset cifar10 (/root/.cache/huggingface/datasets/cifar10/plain_text/1.0.0/447d6ec4733dddd1ce3bb577c7166b986eaa4c538dcd9e805ba61f35674a9de4)\n"
473
+ ]
474
+ },
475
+ {
476
+ "output_type": "display_data",
477
+ "data": {
478
+ "text/plain": [
479
+ " 0%| | 0/2 [00:00<?, ?it/s]"
480
+ ],
481
+ "application/vnd.jupyter.widget-view+json": {
482
+ "version_major": 2,
483
+ "version_minor": 0,
484
+ "model_id": "b6f3481b22fa43758c6273992d49f800"
485
+ }
486
+ },
487
+ "metadata": {}
488
+ },
489
+ {
490
+ "output_type": "stream",
491
+ "name": "stdout",
492
+ "text": [
493
+ "dict_keys(['train', 'test'])\n",
494
+ "60000\n"
495
+ ]
496
+ }
497
+ ]
498
+ },
499
+ {
500
+ "cell_type": "code",
501
+ "source": [
502
+ "from torchvision.transforms.functional import rotate\n",
503
+ "import torchvision.models as models\n",
504
+ "from torch import nn\n",
505
+ "from torchsummary import summary\n",
506
+ "\n",
507
+ "angles = [0, 45, 90, 135, 180, 225, 270, 315]\n",
508
+ "\n",
509
+ "class NeuralNetwork(nn.Module):\n",
510
+ " def __init__(self, n_labels):\n",
511
+ " super(NeuralNetwork, self).__init__()\n",
512
+ " self.backbone = models.mobilenet_v3_large()\n",
513
+ " self.flatten = nn.Flatten()\n",
514
+ " self.linear_relu_stack = nn.Sequential(\n",
515
+ " nn.Linear(1000, 512),\n",
516
+ " nn.ReLU(),\n",
517
+ " nn.Linear(512, 512),\n",
518
+ " nn.ReLU(),\n",
519
+ " nn.Linear(512, n_labels),\n",
520
+ " nn.Softmax(),\n",
521
+ " )\n",
522
+ "\n",
523
+ " def forward(self, x):\n",
524
+ " x = self.backbone(x)\n",
525
+ " x = self.flatten(x)\n",
526
+ " logits = self.linear_relu_stack(x)\n",
527
+ " return logits\n",
528
+ "\n",
529
+ "model = NeuralNetwork(len(angles))\n",
530
+ "summary(model.cuda(), (3,224,224))\n"
531
+ ],
532
+ "metadata": {
533
+ "id": "N6qubMbsUI05",
534
+ "outputId": "f7c4f07f-3d55-41be-966d-2ae3e72d45d4",
535
+ "colab": {
536
+ "base_uri": "https://localhost:8080/"
537
+ }
538
+ },
539
+ "execution_count": 3,
540
+ "outputs": [
541
+ {
542
+ "output_type": "stream",
543
+ "name": "stdout",
544
+ "text": [
545
+ "----------------------------------------------------------------\n",
546
+ " Layer (type) Output Shape Param #\n",
547
+ "================================================================\n",
548
+ " Conv2d-1 [-1, 16, 112, 112] 432\n",
549
+ " BatchNorm2d-2 [-1, 16, 112, 112] 32\n",
550
+ " Hardswish-3 [-1, 16, 112, 112] 0\n",
551
+ " Conv2d-4 [-1, 16, 112, 112] 144\n",
552
+ " BatchNorm2d-5 [-1, 16, 112, 112] 32\n",
553
+ " ReLU-6 [-1, 16, 112, 112] 0\n",
554
+ " Conv2d-7 [-1, 16, 112, 112] 256\n",
555
+ " BatchNorm2d-8 [-1, 16, 112, 112] 32\n",
556
+ " InvertedResidual-9 [-1, 16, 112, 112] 0\n",
557
+ " Conv2d-10 [-1, 64, 112, 112] 1,024\n",
558
+ " BatchNorm2d-11 [-1, 64, 112, 112] 128\n",
559
+ " ReLU-12 [-1, 64, 112, 112] 0\n",
560
+ " Conv2d-13 [-1, 64, 56, 56] 576\n",
561
+ " BatchNorm2d-14 [-1, 64, 56, 56] 128\n",
562
+ " ReLU-15 [-1, 64, 56, 56] 0\n",
563
+ " Conv2d-16 [-1, 24, 56, 56] 1,536\n",
564
+ " BatchNorm2d-17 [-1, 24, 56, 56] 48\n",
565
+ " InvertedResidual-18 [-1, 24, 56, 56] 0\n",
566
+ " Conv2d-19 [-1, 72, 56, 56] 1,728\n",
567
+ " BatchNorm2d-20 [-1, 72, 56, 56] 144\n",
568
+ " ReLU-21 [-1, 72, 56, 56] 0\n",
569
+ " Conv2d-22 [-1, 72, 56, 56] 648\n",
570
+ " BatchNorm2d-23 [-1, 72, 56, 56] 144\n",
571
+ " ReLU-24 [-1, 72, 56, 56] 0\n",
572
+ " Conv2d-25 [-1, 24, 56, 56] 1,728\n",
573
+ " BatchNorm2d-26 [-1, 24, 56, 56] 48\n",
574
+ " InvertedResidual-27 [-1, 24, 56, 56] 0\n",
575
+ " Conv2d-28 [-1, 72, 56, 56] 1,728\n",
576
+ " BatchNorm2d-29 [-1, 72, 56, 56] 144\n",
577
+ " ReLU-30 [-1, 72, 56, 56] 0\n",
578
+ " Conv2d-31 [-1, 72, 28, 28] 1,800\n",
579
+ " BatchNorm2d-32 [-1, 72, 28, 28] 144\n",
580
+ " ReLU-33 [-1, 72, 28, 28] 0\n",
581
+ "AdaptiveAvgPool2d-34 [-1, 72, 1, 1] 0\n",
582
+ " Conv2d-35 [-1, 24, 1, 1] 1,752\n",
583
+ " ReLU-36 [-1, 24, 1, 1] 0\n",
584
+ " Conv2d-37 [-1, 72, 1, 1] 1,800\n",
585
+ " Hardsigmoid-38 [-1, 72, 1, 1] 0\n",
586
+ "SqueezeExcitation-39 [-1, 72, 28, 28] 0\n",
587
+ " Conv2d-40 [-1, 40, 28, 28] 2,880\n",
588
+ " BatchNorm2d-41 [-1, 40, 28, 28] 80\n",
589
+ " InvertedResidual-42 [-1, 40, 28, 28] 0\n",
590
+ " Conv2d-43 [-1, 120, 28, 28] 4,800\n",
591
+ " BatchNorm2d-44 [-1, 120, 28, 28] 240\n",
592
+ " ReLU-45 [-1, 120, 28, 28] 0\n",
593
+ " Conv2d-46 [-1, 120, 28, 28] 3,000\n",
594
+ " BatchNorm2d-47 [-1, 120, 28, 28] 240\n",
595
+ " ReLU-48 [-1, 120, 28, 28] 0\n",
596
+ "AdaptiveAvgPool2d-49 [-1, 120, 1, 1] 0\n",
597
+ " Conv2d-50 [-1, 32, 1, 1] 3,872\n",
598
+ " ReLU-51 [-1, 32, 1, 1] 0\n",
599
+ " Conv2d-52 [-1, 120, 1, 1] 3,960\n",
600
+ " Hardsigmoid-53 [-1, 120, 1, 1] 0\n",
601
+ "SqueezeExcitation-54 [-1, 120, 28, 28] 0\n",
602
+ " Conv2d-55 [-1, 40, 28, 28] 4,800\n",
603
+ " BatchNorm2d-56 [-1, 40, 28, 28] 80\n",
604
+ " InvertedResidual-57 [-1, 40, 28, 28] 0\n",
605
+ " Conv2d-58 [-1, 120, 28, 28] 4,800\n",
606
+ " BatchNorm2d-59 [-1, 120, 28, 28] 240\n",
607
+ " ReLU-60 [-1, 120, 28, 28] 0\n",
608
+ " Conv2d-61 [-1, 120, 28, 28] 3,000\n",
609
+ " BatchNorm2d-62 [-1, 120, 28, 28] 240\n",
610
+ " ReLU-63 [-1, 120, 28, 28] 0\n",
611
+ "AdaptiveAvgPool2d-64 [-1, 120, 1, 1] 0\n",
612
+ " Conv2d-65 [-1, 32, 1, 1] 3,872\n",
613
+ " ReLU-66 [-1, 32, 1, 1] 0\n",
614
+ " Conv2d-67 [-1, 120, 1, 1] 3,960\n",
615
+ " Hardsigmoid-68 [-1, 120, 1, 1] 0\n",
616
+ "SqueezeExcitation-69 [-1, 120, 28, 28] 0\n",
617
+ " Conv2d-70 [-1, 40, 28, 28] 4,800\n",
618
+ " BatchNorm2d-71 [-1, 40, 28, 28] 80\n",
619
+ " InvertedResidual-72 [-1, 40, 28, 28] 0\n",
620
+ " Conv2d-73 [-1, 240, 28, 28] 9,600\n",
621
+ " BatchNorm2d-74 [-1, 240, 28, 28] 480\n",
622
+ " Hardswish-75 [-1, 240, 28, 28] 0\n",
623
+ " Conv2d-76 [-1, 240, 14, 14] 2,160\n",
624
+ " BatchNorm2d-77 [-1, 240, 14, 14] 480\n",
625
+ " Hardswish-78 [-1, 240, 14, 14] 0\n",
626
+ " Conv2d-79 [-1, 80, 14, 14] 19,200\n",
627
+ " BatchNorm2d-80 [-1, 80, 14, 14] 160\n",
628
+ " InvertedResidual-81 [-1, 80, 14, 14] 0\n",
629
+ " Conv2d-82 [-1, 200, 14, 14] 16,000\n",
630
+ " BatchNorm2d-83 [-1, 200, 14, 14] 400\n",
631
+ " Hardswish-84 [-1, 200, 14, 14] 0\n",
632
+ " Conv2d-85 [-1, 200, 14, 14] 1,800\n",
633
+ " BatchNorm2d-86 [-1, 200, 14, 14] 400\n",
634
+ " Hardswish-87 [-1, 200, 14, 14] 0\n",
635
+ " Conv2d-88 [-1, 80, 14, 14] 16,000\n",
636
+ " BatchNorm2d-89 [-1, 80, 14, 14] 160\n",
637
+ " InvertedResidual-90 [-1, 80, 14, 14] 0\n",
638
+ " Conv2d-91 [-1, 184, 14, 14] 14,720\n",
639
+ " BatchNorm2d-92 [-1, 184, 14, 14] 368\n",
640
+ " Hardswish-93 [-1, 184, 14, 14] 0\n",
641
+ " Conv2d-94 [-1, 184, 14, 14] 1,656\n",
642
+ " BatchNorm2d-95 [-1, 184, 14, 14] 368\n",
643
+ " Hardswish-96 [-1, 184, 14, 14] 0\n",
644
+ " Conv2d-97 [-1, 80, 14, 14] 14,720\n",
645
+ " BatchNorm2d-98 [-1, 80, 14, 14] 160\n",
646
+ " InvertedResidual-99 [-1, 80, 14, 14] 0\n",
647
+ " Conv2d-100 [-1, 184, 14, 14] 14,720\n",
648
+ " BatchNorm2d-101 [-1, 184, 14, 14] 368\n",
649
+ " Hardswish-102 [-1, 184, 14, 14] 0\n",
650
+ " Conv2d-103 [-1, 184, 14, 14] 1,656\n",
651
+ " BatchNorm2d-104 [-1, 184, 14, 14] 368\n",
652
+ " Hardswish-105 [-1, 184, 14, 14] 0\n",
653
+ " Conv2d-106 [-1, 80, 14, 14] 14,720\n",
654
+ " BatchNorm2d-107 [-1, 80, 14, 14] 160\n",
655
+ "InvertedResidual-108 [-1, 80, 14, 14] 0\n",
656
+ " Conv2d-109 [-1, 480, 14, 14] 38,400\n",
657
+ " BatchNorm2d-110 [-1, 480, 14, 14] 960\n",
658
+ " Hardswish-111 [-1, 480, 14, 14] 0\n",
659
+ " Conv2d-112 [-1, 480, 14, 14] 4,320\n",
660
+ " BatchNorm2d-113 [-1, 480, 14, 14] 960\n",
661
+ " Hardswish-114 [-1, 480, 14, 14] 0\n",
662
+ "AdaptiveAvgPool2d-115 [-1, 480, 1, 1] 0\n",
663
+ " Conv2d-116 [-1, 120, 1, 1] 57,720\n",
664
+ " ReLU-117 [-1, 120, 1, 1] 0\n",
665
+ " Conv2d-118 [-1, 480, 1, 1] 58,080\n",
666
+ " Hardsigmoid-119 [-1, 480, 1, 1] 0\n",
667
+ "SqueezeExcitation-120 [-1, 480, 14, 14] 0\n",
668
+ " Conv2d-121 [-1, 112, 14, 14] 53,760\n",
669
+ " BatchNorm2d-122 [-1, 112, 14, 14] 224\n",
670
+ "InvertedResidual-123 [-1, 112, 14, 14] 0\n",
671
+ " Conv2d-124 [-1, 672, 14, 14] 75,264\n",
672
+ " BatchNorm2d-125 [-1, 672, 14, 14] 1,344\n",
673
+ " Hardswish-126 [-1, 672, 14, 14] 0\n",
674
+ " Conv2d-127 [-1, 672, 14, 14] 6,048\n",
675
+ " BatchNorm2d-128 [-1, 672, 14, 14] 1,344\n",
676
+ " Hardswish-129 [-1, 672, 14, 14] 0\n",
677
+ "AdaptiveAvgPool2d-130 [-1, 672, 1, 1] 0\n",
678
+ " Conv2d-131 [-1, 168, 1, 1] 113,064\n",
679
+ " ReLU-132 [-1, 168, 1, 1] 0\n",
680
+ " Conv2d-133 [-1, 672, 1, 1] 113,568\n",
681
+ " Hardsigmoid-134 [-1, 672, 1, 1] 0\n",
682
+ "SqueezeExcitation-135 [-1, 672, 14, 14] 0\n",
683
+ " Conv2d-136 [-1, 112, 14, 14] 75,264\n",
684
+ " BatchNorm2d-137 [-1, 112, 14, 14] 224\n",
685
+ "InvertedResidual-138 [-1, 112, 14, 14] 0\n",
686
+ " Conv2d-139 [-1, 672, 14, 14] 75,264\n",
687
+ " BatchNorm2d-140 [-1, 672, 14, 14] 1,344\n",
688
+ " Hardswish-141 [-1, 672, 14, 14] 0\n",
689
+ " Conv2d-142 [-1, 672, 7, 7] 16,800\n",
690
+ " BatchNorm2d-143 [-1, 672, 7, 7] 1,344\n",
691
+ " Hardswish-144 [-1, 672, 7, 7] 0\n",
692
+ "AdaptiveAvgPool2d-145 [-1, 672, 1, 1] 0\n",
693
+ " Conv2d-146 [-1, 168, 1, 1] 113,064\n",
694
+ " ReLU-147 [-1, 168, 1, 1] 0\n",
695
+ " Conv2d-148 [-1, 672, 1, 1] 113,568\n",
696
+ " Hardsigmoid-149 [-1, 672, 1, 1] 0\n",
697
+ "SqueezeExcitation-150 [-1, 672, 7, 7] 0\n",
698
+ " Conv2d-151 [-1, 160, 7, 7] 107,520\n",
699
+ " BatchNorm2d-152 [-1, 160, 7, 7] 320\n",
700
+ "InvertedResidual-153 [-1, 160, 7, 7] 0\n",
701
+ " Conv2d-154 [-1, 960, 7, 7] 153,600\n",
702
+ " BatchNorm2d-155 [-1, 960, 7, 7] 1,920\n",
703
+ " Hardswish-156 [-1, 960, 7, 7] 0\n",
704
+ " Conv2d-157 [-1, 960, 7, 7] 24,000\n",
705
+ " BatchNorm2d-158 [-1, 960, 7, 7] 1,920\n",
706
+ " Hardswish-159 [-1, 960, 7, 7] 0\n",
707
+ "AdaptiveAvgPool2d-160 [-1, 960, 1, 1] 0\n",
708
+ " Conv2d-161 [-1, 240, 1, 1] 230,640\n",
709
+ " ReLU-162 [-1, 240, 1, 1] 0\n",
710
+ " Conv2d-163 [-1, 960, 1, 1] 231,360\n",
711
+ " Hardsigmoid-164 [-1, 960, 1, 1] 0\n",
712
+ "SqueezeExcitation-165 [-1, 960, 7, 7] 0\n",
713
+ " Conv2d-166 [-1, 160, 7, 7] 153,600\n",
714
+ " BatchNorm2d-167 [-1, 160, 7, 7] 320\n",
715
+ "InvertedResidual-168 [-1, 160, 7, 7] 0\n",
716
+ " Conv2d-169 [-1, 960, 7, 7] 153,600\n",
717
+ " BatchNorm2d-170 [-1, 960, 7, 7] 1,920\n",
718
+ " Hardswish-171 [-1, 960, 7, 7] 0\n",
719
+ " Conv2d-172 [-1, 960, 7, 7] 24,000\n",
720
+ " BatchNorm2d-173 [-1, 960, 7, 7] 1,920\n",
721
+ " Hardswish-174 [-1, 960, 7, 7] 0\n",
722
+ "AdaptiveAvgPool2d-175 [-1, 960, 1, 1] 0\n",
723
+ " Conv2d-176 [-1, 240, 1, 1] 230,640\n",
724
+ " ReLU-177 [-1, 240, 1, 1] 0\n",
725
+ " Conv2d-178 [-1, 960, 1, 1] 231,360\n",
726
+ " Hardsigmoid-179 [-1, 960, 1, 1] 0\n",
727
+ "SqueezeExcitation-180 [-1, 960, 7, 7] 0\n",
728
+ " Conv2d-181 [-1, 160, 7, 7] 153,600\n",
729
+ " BatchNorm2d-182 [-1, 160, 7, 7] 320\n",
730
+ "InvertedResidual-183 [-1, 160, 7, 7] 0\n",
731
+ " Conv2d-184 [-1, 960, 7, 7] 153,600\n",
732
+ " BatchNorm2d-185 [-1, 960, 7, 7] 1,920\n",
733
+ " Hardswish-186 [-1, 960, 7, 7] 0\n",
734
+ "AdaptiveAvgPool2d-187 [-1, 960, 1, 1] 0\n",
735
+ " Linear-188 [-1, 1280] 1,230,080\n",
736
+ " Hardswish-189 [-1, 1280] 0\n",
737
+ " Dropout-190 [-1, 1280] 0\n",
738
+ " Linear-191 [-1, 1000] 1,281,000\n",
739
+ " MobileNetV3-192 [-1, 1000] 0\n",
740
+ " Flatten-193 [-1, 1000] 0\n",
741
+ " Linear-194 [-1, 512] 512,512\n",
742
+ " ReLU-195 [-1, 512] 0\n",
743
+ " Linear-196 [-1, 512] 262,656\n",
744
+ " ReLU-197 [-1, 512] 0\n",
745
+ " Linear-198 [-1, 8] 4,104\n",
746
+ " Softmax-199 [-1, 8] 0\n",
747
+ "================================================================\n",
748
+ "Total params: 6,262,304\n",
749
+ "Trainable params: 6,262,304\n",
750
+ "Non-trainable params: 0\n",
751
+ "----------------------------------------------------------------\n",
752
+ "Input size (MB): 0.57\n",
753
+ "Forward/backward pass size (MB): 105.45\n",
754
+ "Params size (MB): 23.89\n",
755
+ "Estimated Total Size (MB): 129.91\n",
756
+ "----------------------------------------------------------------\n"
757
+ ]
758
+ },
759
+ {
760
+ "output_type": "stream",
761
+ "name": "stderr",
762
+ "text": [
763
+ "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py:141: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
764
+ " input = module(input)\n"
765
+ ]
766
+ }
767
+ ]
768
+ },
769
+ {
770
+ "cell_type": "code",
771
+ "source": [
772
+ "from torch.utils.data import Dataset\n",
773
+ "import torchvision.transforms.functional as TF\n",
774
+ "import torchvision.transforms as transforms\n",
775
+ "import torch.nn.functional as F\n",
776
+ "import numpy as np\n",
777
+ "import random\n",
778
+ "import torch\n",
779
+ "\n",
780
+ "class UpsideDownDataset(Dataset):\n",
781
+ " \"\"\"Face Landmarks dataset.\"\"\"\n",
782
+ "\n",
783
+ " def __init__(self, images_list, angles_list, factor=3):\n",
784
+ " \"\"\"\n",
785
+ " Args:\n",
786
+ " csv_file (string): Path to the csv file with annotations.\n",
787
+ " root_dir (string): Directory with all the images.\n",
788
+ " transform (callable, optional): Optional transform to be applied\n",
789
+ " on a sample.\n",
790
+ " \"\"\"\n",
791
+ " self.images = random.sample(images_list * factor, len(images_list * factor))\n",
792
+ " self.angles = np.random.choice(angles_list, factor*len(images_list))\n",
793
+ " assert len(self.images) == len(self.angles)\n",
794
+ " self.transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Resize((224,224))])\n",
795
+ "\n",
796
+ " def __len__(self):\n",
797
+ " return len(self.angles)\n",
798
+ "\n",
799
+ " def __getitem__(self, idx):\n",
800
+ " if torch.is_tensor(idx):\n",
801
+ " idx = idx.tolist()\n",
802
+ " rotation_angle = int(self.angles[idx])\n",
803
+ " image = self.transform(self.images[idx])\n",
804
+ "\n",
805
+ " augmented_image = TF.rotate(image, rotation_angle)\n",
806
+ " # print(np.unique(self.angles), rotation_angle)\n",
807
+ " label = torch.zeros(len(np.unique(self.angles)))\n",
808
+ " label[np.where(np.unique(self.angles) == rotation_angle)] = 1\n",
809
+ " # F.one_hot(, num_classes=len(self.angles.unique()))\n",
810
+ " sample = (augmented_image, label)\n",
811
+ "\n",
812
+ " return sample\n",
813
+ "\n",
814
+ "dataset = UpsideDownDataset(images_list=all_images, angles_list=angles)\n",
815
+ "print(dataset.__len__())\n",
816
+ "print(dataset.__getitem__(15)[0].shape, dataset.__getitem__(15)[1])"
817
+ ],
818
+ "metadata": {
819
+ "id": "pc1CHBWqOb6I",
820
+ "outputId": "80ee4bf3-c409-457e-a3a1-317177a5e1f5",
821
+ "colab": {
822
+ "base_uri": "https://localhost:8080/"
823
+ }
824
+ },
825
+ "execution_count": 4,
826
+ "outputs": [
827
+ {
828
+ "output_type": "stream",
829
+ "name": "stdout",
830
+ "text": [
831
+ "180000\n",
832
+ "torch.Size([3, 224, 224]) tensor([0., 0., 1., 0., 0., 0., 0., 0.])\n"
833
+ ]
834
+ }
835
+ ]
836
+ },
837
+ {
838
+ "cell_type": "code",
839
+ "source": [
840
+ "import os\n",
841
+ "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
842
+ "\n",
843
+ "import torch\n",
844
+ "import torchvision\n",
845
+ "import torchvision.transforms as transforms\n",
846
+ "import torch.optim as optim\n",
847
+ "import numpy as np\n",
848
+ "\n",
849
+ "\n",
850
+ "batch_size = 32\n",
851
+ "\n",
852
+ "dataset_len = len(dataset)\n",
853
+ "train_amount, val_amount = int(0.7 * dataset_len), int(0.2 * dataset_len)\n",
854
+ "test_amount = dataset_len - (train_amount + val_amount)\n",
855
+ "train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_amount, val_amount, test_amount])\n",
856
+ "\n",
857
+ "trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,\n",
858
+ " shuffle=True, num_workers=2)\n",
859
+ "validationloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size,\n",
860
+ " shuffle=False, num_workers=2)\n",
861
+ "testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size,\n",
862
+ " shuffle=False, num_workers=2)\n",
863
+ "\n",
864
+ "\n",
865
+ "criterion = nn.CrossEntropyLoss()\n",
866
+ "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n",
867
+ "model = model.cuda()\n",
868
+ "\n",
869
+ "EPOCHS = 10\n",
870
+ "for epoch in range(EPOCHS): # loop over the dataset multiple times\n",
871
+ "\n",
872
+ " running_loss = 0.0\n",
873
+ " for i, data in enumerate(trainloader):\n",
874
+ " # get the inputs; data is a list of [inputs, labels]\n",
875
+ " inputs, labels = data\n",
876
+ "\n",
877
+ " # zero the parameter gradients\n",
878
+ " optimizer.zero_grad()\n",
879
+ "\n",
880
+ " # forward + backward + optimize\n",
881
+ " outputs = model(inputs.cuda())\n",
882
+ " loss = criterion(outputs, labels.cuda())\n",
883
+ " loss.backward()\n",
884
+ " optimizer.step()\n",
885
+ "\n",
886
+ " # print statistics\n",
887
+ " running_loss += loss.item()\n",
888
+ " if i % 2000 == 1999: # print every 2000 mini-batches\n",
889
+ " print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')\n",
890
+ " running_loss = 0.0\n",
891
+ "\n",
892
+ "print('Finished Training')"
893
+ ],
894
+ "metadata": {
895
+ "id": "9TcAXOX2hlIv",
896
+ "outputId": "49bc6ac6-9a01-4e54-f61e-2eb07e7850d6",
897
+ "colab": {
898
+ "base_uri": "https://localhost:8080/"
899
+ }
900
+ },
901
+ "execution_count": 5,
902
+ "outputs": [
903
+ {
904
+ "output_type": "stream",
905
+ "name": "stderr",
906
+ "text": [
907
+ "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py:141: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
908
+ " input = module(input)\n"
909
+ ]
910
+ },
911
+ {
912
+ "output_type": "stream",
913
+ "name": "stdout",
914
+ "text": [
915
+ "[1, 2000] loss: 2.079\n",
916
+ "[2, 2000] loss: 2.078\n",
917
+ "[3, 2000] loss: 2.034\n",
918
+ "[4, 2000] loss: 1.952\n",
919
+ "[5, 2000] loss: 1.941\n",
920
+ "[6, 2000] loss: 1.920\n",
921
+ "[7, 2000] loss: 1.840\n",
922
+ "[8, 2000] loss: 1.782\n",
923
+ "[9, 2000] loss: 1.735\n",
924
+ "[10, 2000] loss: 1.696\n",
925
+ "Finished Training\n"
926
+ ]
927
+ }
928
+ ]
929
+ },
930
+ {
931
+ "cell_type": "code",
932
+ "source": [
933
+ "from google.colab import drive\n",
934
+ "drive.mount('/content/drive')"
935
+ ],
936
+ "metadata": {
937
+ "id": "6Ipqob3xEZKn",
938
+ "outputId": "25720ce1-e54b-4056-8193-fedf4b7bc61f",
939
+ "colab": {
940
+ "base_uri": "https://localhost:8080/"
941
+ }
942
+ },
943
+ "execution_count": 7,
944
+ "outputs": [
945
+ {
946
+ "output_type": "stream",
947
+ "name": "stdout",
948
+ "text": [
949
+ "Mounted at /content/drive\n"
950
+ ]
951
+ }
952
+ ]
953
+ },
954
+ {
955
+ "cell_type": "code",
956
+ "source": [
957
+ "torch.save(model.state_dict(), \"/content/drive/MyDrive/upside_down_model_weughts.pth\")"
958
+ ],
959
+ "metadata": {
960
+ "id": "8nhGxmFpEghe"
961
+ },
962
+ "execution_count": 8,
963
+ "outputs": []
964
+ },
965
+ {
966
+ "cell_type": "code",
967
+ "source": [
968
+ "# correct = 0\n",
969
+ "# total = 0\n",
970
+ "# # since we're not training, we don't need to calculate the gradients for our outputs\n",
971
+ "# with torch.no_grad():\n",
972
+ "# for data in testloader:\n",
973
+ "# images, labels = data\n",
974
+ "# # calculate outputs by running images through the network\n",
975
+ "# outputs = model(images.cuda())\n",
976
+ "# # the class with the highest energy is what we choose as prediction\n",
977
+ "# predicted = outputs.data.argmax(axis=1)\n",
978
+ "# # print(predicted.shape)\n",
979
+ "# # print(labels.argmax(axis=1).shape)\n",
980
+ "# total += labels.size(0)\n",
981
+ "# correct += (predicted.cpu() == labels.argmax(axis=1)).sum().item()\n",
982
+ "\n",
983
+ "# print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')\n",
984
+ "\n",
985
+ "\n",
986
+ "# prepare to count predictions for each class\n",
987
+ "correct_pred = {classname: 0 for classname in angles}\n",
988
+ "total_pred = {classname: 0 for classname in angles}\n",
989
+ "\n",
990
+ "# again no gradients needed\n",
991
+ "with torch.no_grad():\n",
992
+ " for data in testloader:\n",
993
+ " images, labels = data\n",
994
+ " outputs = model(images.cuda())\n",
995
+ " predictions = outputs.data.argmax(axis=1).cpu()\n",
996
+ " labels = labels.argmax(axis=1)\n",
997
+ " # collect the correct predictions for each class\n",
998
+ " for label, prediction in zip(labels, predictions):\n",
999
+ " if label == prediction:\n",
1000
+ " correct_pred[angles[label]] += 1\n",
1001
+ " total_pred[angles[label]] += 1\n"
1002
+ ],
1003
+ "metadata": {
1004
+ "id": "CXEbyGwdKhs-",
1005
+ "outputId": "e93e6099-c297-41b7-e03d-c7d6c0b325a2",
1006
+ "colab": {
1007
+ "base_uri": "https://localhost:8080/",
1008
+ "height": 264
1009
+ }
1010
+ },
1011
+ "execution_count": 23,
1012
+ "outputs": [
1013
+ {
1014
+ "output_type": "stream",
1015
+ "name": "stderr",
1016
+ "text": [
1017
+ "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py:141: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
1018
+ " input = module(input)\n"
1019
+ ]
1020
+ },
1021
+ {
1022
+ "output_type": "error",
1023
+ "ename": "ValueError",
1024
+ "evalue": "ignored",
1025
+ "traceback": [
1026
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1027
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
1028
+ "\u001b[0;32m<ipython-input-23-9719f7405f9f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mclassname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcorrect_count\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcorrect_pred\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m100\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorrect_count\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mtotal_pred\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mclassname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'Accuracy for class: {classname:5s} is {accuracy:.1f} %'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1029
+ "\u001b[0;31mValueError\u001b[0m: Unknown format code 's' for object of type 'int'"
1030
+ ]
1031
+ }
1032
+ ]
1033
+ },
1034
+ {
1035
+ "cell_type": "code",
1036
+ "source": [
1037
+ "\n",
1038
+ "# print accuracy for each class\n",
1039
+ "for classname, correct_count in correct_pred.items():\n",
1040
+ " accuracy = 100 * float(correct_count) / total_pred[classname]\n",
1041
+ " print(f'Accuracy for class: {classname} is {accuracy:.1f} %')"
1042
+ ],
1043
+ "metadata": {
1044
+ "id": "H_EnvtnBQ22l",
1045
+ "outputId": "510345fb-5ecd-4ea7-8b7a-15eaf8f3c325",
1046
+ "colab": {
1047
+ "base_uri": "https://localhost:8080/"
1048
+ }
1049
+ },
1050
+ "execution_count": 24,
1051
+ "outputs": [
1052
+ {
1053
+ "output_type": "stream",
1054
+ "name": "stdout",
1055
+ "text": [
1056
+ "Accuracy for class: 0 is 52.8 %\n",
1057
+ "Accuracy for class: 45 is 78.7 %\n",
1058
+ "Accuracy for class: 90 is 49.1 %\n",
1059
+ "Accuracy for class: 135 is 72.1 %\n",
1060
+ "Accuracy for class: 180 is 65.0 %\n",
1061
+ "Accuracy for class: 225 is 0.0 %\n",
1062
+ "Accuracy for class: 270 is 57.7 %\n",
1063
+ "Accuracy for class: 315 is 98.2 %\n"
1064
+ ]
1065
+ }
1066
+ ]
1067
+ },
1068
+ {
1069
+ "cell_type": "code",
1070
+ "source": [
1071
+ "!pip install huggingface_hub"
1072
+ ],
1073
+ "metadata": {
1074
+ "id": "iC1dtDoGVe1g",
1075
+ "outputId": "e14f64e6-ba54-4f62-c8b5-f1b5d8f55c94",
1076
+ "colab": {
1077
+ "base_uri": "https://localhost:8080/"
1078
+ }
1079
+ },
1080
+ "execution_count": 25,
1081
+ "outputs": [
1082
+ {
1083
+ "output_type": "stream",
1084
+ "name": "stdout",
1085
+ "text": [
1086
+ "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.7/dist-packages (0.4.0)\n",
1087
+ "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (4.11.3)\n",
1088
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (3.6.0)\n",
1089
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (3.10.0.2)\n",
1090
+ "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (3.13)\n",
1091
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (2.23.0)\n",
1092
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (4.63.0)\n",
1093
+ "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.7/dist-packages (from huggingface_hub) (21.3)\n",
1094
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.9->huggingface_hub) (3.0.7)\n",
1095
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->huggingface_hub) (3.7.0)\n",
1096
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->huggingface_hub) (2021.10.8)\n",
1097
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->huggingface_hub) (1.25.11)\n",
1098
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->huggingface_hub) (3.0.4)\n",
1099
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->huggingface_hub) (2.10)\n"
1100
+ ]
1101
+ }
1102
+ ]
1103
+ },
1104
+ {
1105
+ "cell_type": "markdown",
1106
+ "source": [
1107
+ "**Write up**: \n",
1108
+ "* Link to the model on Hugging Face Hub: \n",
1109
+ "* Include some examples of misclassified images. Please explain what you might do to improve your model's performance on these images in the future (you do not need to impelement these suggestions)"
1110
+ ],
1111
+ "metadata": {
1112
+ "id": "qSeLed2JxvGI"
1113
+ }
1114
+ },
1115
+ {
1116
+ "cell_type": "markdown",
1117
+ "metadata": {
1118
+ "id": "sFU9LTOyMiMj"
1119
+ },
1120
+ "source": [
1121
+ "# 2. Deep Learning for NLP\n",
1122
+ "\n",
1123
+ "**Fake news classifier**: Train a text classification model to detect fake news articles!\n",
1124
+ "\n",
1125
+ "* Download the dataset here: https://www.kaggle.com/clmentbisaillon/fake-and-real-news-dataset\n",
1126
+ "* Develop an NLP model for classification that uses a pretrained language model\n",
1127
+ "* Finetune your model on the dataset, and generate an AUC curve of your model on the test set of your choice. \n",
1128
+ "* [Upload the the model to the Hugging Face Hub](https://huggingface.co/docs/hub/adding-a-model), and add a link to your model below.\n",
1129
+ "* *Answer the following question*: Look at some of the news articles that were classified incorrectly. Please explain what you might do to improve your model's performance on these news articles in the future (you do not need to impelement these suggestions)"
1130
+ ]
1131
+ },
1132
+ {
1133
+ "cell_type": "code",
1134
+ "source": [
1135
+ "### WRITE YOUR CODE TO TRAIN THE MODEL HERE"
1136
+ ],
1137
+ "metadata": {
1138
+ "id": "E90i018KyJH3"
1139
+ },
1140
+ "execution_count": null,
1141
+ "outputs": []
1142
+ },
1143
+ {
1144
+ "cell_type": "markdown",
1145
+ "source": [
1146
+ "**Write up**: \n",
1147
+ "* Link to the model on Hugging Face Hub: \n",
1148
+ "* Include some examples of misclassified news articles. Please explain what you might do to improve your model's performance on these news articles in the future (you do not need to impelement these suggestions)"
1149
+ ],
1150
+ "metadata": {
1151
+ "id": "kpInVUMLyJ24"
1152
+ }
1153
+ },
1154
+ {
1155
+ "cell_type": "markdown",
1156
+ "metadata": {
1157
+ "id": "jTfHpo6BOmE8"
1158
+ },
1159
+ "source": [
1160
+ "# 3. Deep RL / Robotics"
1161
+ ]
1162
+ },
1163
+ {
1164
+ "cell_type": "markdown",
1165
+ "metadata": {
1166
+ "id": "saB64bbTXWgZ"
1167
+ },
1168
+ "source": [
1169
+ "**RL for Classical Control:** Using any of the [classical control](https://github.com/openai/gym/blob/master/docs/environments.md#classic-control) environments from OpenAI's `gym`, implement a deep NN that learns an optimal policy which maximizes the reward of the environment.\n",
1170
+ "\n",
1171
+ "* Describe the NN you implemented and the behavior you observe from the agent as the model converges (or diverges).\n",
1172
+ "* Plot the reward as a function of steps (or Epochs).\n",
1173
+ "Compare your results to a random agent.\n",
1174
+ "* Discuss whether you think your model has learned the optimal policy and potential methods for improving it and/or where it might fail.\n",
1175
+ "* (Optional) [Upload the the model to the Hugging Face Hub](https://huggingface.co/docs/hub/adding-a-model), and add a link to your model below.\n",
1176
+ "\n",
1177
+ "\n",
1178
+ "You may use any frameworks you like, but you must implement your NN on your own (no pre-defined/trained models like [`stable_baselines`](https://stable-baselines.readthedocs.io/en/master/)).\n",
1179
+ "\n",
1180
+ "You may use any simulator other than `gym` _however_:\n",
1181
+ "* The environment has to be similar to the classical control environments (or more complex like [`robosuite`](https://github.com/ARISE-Initiative/robosuite)).\n",
1182
+ "* You cannot choose a game/Atari/text based environment. The purpose of this challenge is to demonstrate an understanding of basic kinematic/dynamic systems."
1183
+ ]
1184
+ },
1185
+ {
1186
+ "cell_type": "code",
1187
+ "source": [
1188
+ "### WRITE YOUR CODE TO TRAIN THE MODEL HERE"
1189
+ ],
1190
+ "metadata": {
1191
+ "id": "CUhkTcoeynVv"
1192
+ },
1193
+ "execution_count": null,
1194
+ "outputs": []
1195
+ },
1196
+ {
1197
+ "cell_type": "markdown",
1198
+ "source": [
1199
+ "**Write up**: \n",
1200
+ "* (Optional) link to the model on Hugging Face Hub: \n",
1201
+ "* Discuss whether you think your model has learned the optimal policy and potential methods for improving it and/or where it might fail."
1202
+ ],
1203
+ "metadata": {
1204
+ "id": "bWllPZhJyotg"
1205
+ }
1206
+ },
1207
+ {
1208
+ "cell_type": "markdown",
1209
+ "metadata": {
1210
+ "id": "rbrRbrISa5J_"
1211
+ },
1212
+ "source": [
1213
+ "# 4. Theory / Linear Algebra "
1214
+ ]
1215
+ },
1216
+ {
1217
+ "cell_type": "markdown",
1218
+ "metadata": {
1219
+ "id": "KFkLRCzTXTzL"
1220
+ },
1221
+ "source": [
1222
+ "**Implement Contrastive PCA** Read [this paper](https://www.nature.com/articles/s41467-018-04608-8) and implement contrastive PCA in Python.\n",
1223
+ "\n",
1224
+ "* First, please discuss what kind of dataset this would make sense to use this method on\n",
1225
+ "* Implement the method in Python (do not use previous implementations of the method if they already exist)\n",
1226
+ "* Then create a synthetic dataset and apply the method to the synthetic data. Compare with standard PCA.\n"
1227
+ ]
1228
+ },
1229
+ {
1230
+ "cell_type": "markdown",
1231
+ "source": [
1232
+ "**Write up**: Discuss what kind of dataset it would make sense to use Contrastive PCA"
1233
+ ],
1234
+ "metadata": {
1235
+ "id": "TpyqWl-ly0wy"
1236
+ }
1237
+ },
1238
+ {
1239
+ "cell_type": "code",
1240
+ "source": [
1241
+ "### WRITE YOUR CODE HERE"
1242
+ ],
1243
+ "metadata": {
1244
+ "id": "1CQzUSfQywRk"
1245
+ },
1246
+ "execution_count": null,
1247
+ "outputs": []
1248
+ },
1249
+ {
1250
+ "cell_type": "markdown",
1251
+ "source": [
1252
+ "# 5. Systems"
1253
+ ],
1254
+ "metadata": {
1255
+ "id": "dlqmZS5Hy6q-"
1256
+ }
1257
+ },
1258
+ {
1259
+ "cell_type": "markdown",
1260
+ "source": [
1261
+ "**Inference on the edge**: Measure the inference times in various computationally-constrained settings\n",
1262
+ "\n",
1263
+ "* Pick a few different speech detection models (we suggest looking at models on the [Hugging Face Hub](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads))\n",
1264
+ "* Simulate different memory constraints and CPU allocations that are realistic for edge devices that might run such models, such as smart speakers or microcontrollers, and measure what is the average inference time of the models under these conditions \n",
1265
+ "* How does the inference time vary with (1) choice of model (2) available system memory (3) available CPU (4) size of input?\n",
1266
+ "\n",
1267
+ "Are there any surprising discoveries? (Note that this coding challenge is fairly open-ended, so we will be considering the amount of effort invested in discovering something interesting here)."
1268
+ ],
1269
+ "metadata": {
1270
+ "id": "QW_eiDFw1QKm"
1271
+ }
1272
+ },
1273
+ {
1274
+ "cell_type": "code",
1275
+ "source": [
1276
+ "### WRITE YOUR CODE HERE"
1277
+ ],
1278
+ "metadata": {
1279
+ "id": "OYp94wLP1kWJ"
1280
+ },
1281
+ "execution_count": null,
1282
+ "outputs": []
1283
+ },
1284
+ {
1285
+ "cell_type": "markdown",
1286
+ "source": [
1287
+ "**Write up**: What surprising discoveries do you see?"
1288
+ ],
1289
+ "metadata": {
1290
+ "id": "yoHmutWx2jer"
1291
+ }
1292
+ }
1293
+ ]
1294
+ }
upside_down_model_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6b74c2546cee5e4dac87ddbc1c38af24de484741a26f0b08caa2d9b228a178
3
+ size 25260869