panderior commited on
Commit
d23fad5
1 Parent(s): e481545

Upload Coding Excercise.ipynb

Browse files
Files changed (1) hide show
  1. Coding Excercise.ipynb +236 -0
Coding Excercise.ipynb ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "3d8c593f",
7
+ "metadata": {
8
+ "ExecuteTime": {
9
+ "end_time": "2023-03-26T07:31:34.213141Z",
10
+ "start_time": "2023-03-26T07:31:14.082603Z"
11
+ },
12
+ "scrolled": true
13
+ },
14
+ "outputs": [],
15
+ "source": [
16
+ "!pip install huggingface_hub\n",
17
+ "!pip install datasets\n",
18
+ "!pip install keras"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": null,
24
+ "id": "bca92d1d",
25
+ "metadata": {
26
+ "ExecuteTime": {
27
+ "end_time": "2023-03-26T14:03:42.287776Z",
28
+ "start_time": "2023-03-26T14:03:39.989670Z"
29
+ }
30
+ },
31
+ "outputs": [],
32
+ "source": [
33
+ "from huggingface_hub import notebook_login\n",
34
+ "from datasets import load_dataset\n",
35
+ "import pandas as pd\n",
36
+ "from datasets import load_dataset\n",
37
+ "import tensorflow as tf\n",
38
+ "from tensorflow.keras.applications.vgg16 import VGG16\n",
39
+ "from tensorflow.keras.models import Model\n",
40
+ "from tensorflow.keras.layers import Dense, GlobalAveragePooling2D\n",
41
+ "from tensorflow.keras.optimizers import Adam\n",
42
+ "from tensorflow.keras.utils import to_categorical\n",
43
+ "from PIL import Image\n",
44
+ "import numpy as np"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": null,
50
+ "id": "62254f94",
51
+ "metadata": {
52
+ "ExecuteTime": {
53
+ "end_time": "2023-03-26T14:03:42.317000Z",
54
+ "start_time": "2023-03-26T14:03:42.289947Z"
55
+ }
56
+ },
57
+ "outputs": [],
58
+ "source": [
59
+ "notebook_login()"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": null,
65
+ "id": "57308b59",
66
+ "metadata": {
67
+ "ExecuteTime": {
68
+ "end_time": "2023-03-26T14:03:52.591875Z",
69
+ "start_time": "2023-03-26T14:03:48.476822Z"
70
+ }
71
+ },
72
+ "outputs": [],
73
+ "source": [
74
+ "# load dataset from hugging face\n",
75
+ "# prepare data for training, validation and testing\n",
76
+ "train_ds, val_ds = load_dataset('competitions/aiornot', split=\"train\").train_test_split(test_size=0.15).values()"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "id": "b83b1536",
83
+ "metadata": {
84
+ "ExecuteTime": {
85
+ "end_time": "2023-03-26T14:04:10.210069Z",
86
+ "start_time": "2023-03-26T14:03:53.833533Z"
87
+ }
88
+ },
89
+ "outputs": [],
90
+ "source": [
91
+ "data_sz = 1000\n",
92
+ "X_train = train_ds[:data_sz]['image']\n",
93
+ "X_val = val_ds[:data_sz]['image']\n",
94
+ "Y_train = to_categorical(train_ds[:data_sz]['label'])\n",
95
+ "Y_val = to_categorical(val_ds[:data_sz]['label'])\n",
96
+ "# Convert the input data to a NumPy array\n",
97
+ "X_train = np.stack([np.array(image) for image in X_train])\n",
98
+ "X_val = np.stack([np.array(image) for image in X_val])"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": null,
104
+ "id": "72df9419",
105
+ "metadata": {
106
+ "ExecuteTime": {
107
+ "start_time": "2023-03-26T14:04:33.658Z"
108
+ }
109
+ },
110
+ "outputs": [],
111
+ "source": [
112
+ "with tf.device('/device:GPU:3'):\n",
113
+ " # Load the VGG16 model pre-trained on ImageNet\n",
114
+ " base_model = VGG16(weights='imagenet', include_top=False)\n",
115
+ "\n",
116
+ " # Add a global spatial average pooling layer\n",
117
+ " x = base_model.output\n",
118
+ " x = GlobalAveragePooling2D()(x)\n",
119
+ "\n",
120
+ " # Add a fully-connected layer\n",
121
+ " x = Dense(1024, activation='relu')(x)\n",
122
+ "\n",
123
+ " # Add a logistic layer with the number of classes of target variable\n",
124
+ " num_classes = 2\n",
125
+ " predictions = Dense(num_classes, activation='softmax')(x)\n",
126
+ "\n",
127
+ " # Create the final model\n",
128
+ " model = Model(inputs=base_model.input, outputs=predictions)\n",
129
+ "\n",
130
+ " # Freeze all layers in the base VGG16 model\n",
131
+ " for layer in base_model.layers:\n",
132
+ " layer.trainable = False\n",
133
+ "\n",
134
+ " # Compile the model\n",
135
+ " model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])\n",
136
+ "\n",
137
+ "# Train the model on your new dataset\n",
138
+ "model.fit(X_train, Y_train, epochs=10, validation_data=(X_val, Y_val))"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": null,
144
+ "id": "bbf079b7",
145
+ "metadata": {
146
+ "ExecuteTime": {
147
+ "start_time": "2023-03-26T14:05:03.786Z"
148
+ }
149
+ },
150
+ "outputs": [],
151
+ "source": [
152
+ "# Generate predictions for the data\n",
153
+ "y_pred = model.predict(X_val)\n",
154
+ "# Convert predictions and true labels to class indices\n",
155
+ "y_pred_classes = y_pred.argmax(axis=1)\n",
156
+ "y_true_classes = Y_val.argmax(axis=1)\n",
157
+ "# Find the indices of the misclassified samples\n",
158
+ "misclassified_indices = np.where(y_pred_classes != y_true_classes)[0]\n",
159
+ "\n",
160
+ "# Get the misclassified samples\n",
161
+ "# x_misclassified = X_val[misclassified_indices]\n",
162
+ "# y_misclassified_true = Y_val[misclassified_indices]\n",
163
+ "# y_misclassified_pred = y_pred[misclassified_indices]"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": null,
169
+ "id": "1e639f6b",
170
+ "metadata": {
171
+ "ExecuteTime": {
172
+ "start_time": "2023-03-26T14:05:06.090Z"
173
+ }
174
+ },
175
+ "outputs": [],
176
+ "source": [
177
+ "# a helper function to view missclassfied data with the image and prediction\n",
178
+ "def checkMiss(idx):\n",
179
+ " print(\"\\ncorrect:\", Y_val[idx])\n",
180
+ " print(\"miss:\", y_pred[idx])\n",
181
+ " img = Image.fromarray(X_val[idx])\n",
182
+ " img.show()"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": null,
188
+ "id": "951ff24e",
189
+ "metadata": {
190
+ "ExecuteTime": {
191
+ "start_time": "2023-03-26T14:05:07.650Z"
192
+ }
193
+ },
194
+ "outputs": [],
195
+ "source": [
196
+ "# view 5 miss classified data to see what could be improved\n",
197
+ "for i in range(10):\n",
198
+ " checkMiss(misclassified_indices[i])"
199
+ ]
200
+ }
201
+ ],
202
+ "metadata": {
203
+ "kernelspec": {
204
+ "display_name": "Python 3",
205
+ "language": "python",
206
+ "name": "python3"
207
+ },
208
+ "language_info": {
209
+ "codemirror_mode": {
210
+ "name": "ipython",
211
+ "version": 3
212
+ },
213
+ "file_extension": ".py",
214
+ "mimetype": "text/x-python",
215
+ "name": "python",
216
+ "nbconvert_exporter": "python",
217
+ "pygments_lexer": "ipython3",
218
+ "version": "3.8.10"
219
+ },
220
+ "toc": {
221
+ "base_numbering": 1,
222
+ "nav_menu": {},
223
+ "number_sections": false,
224
+ "sideBar": true,
225
+ "skip_h1_title": false,
226
+ "title_cell": "Table of Contents",
227
+ "title_sidebar": "Contents",
228
+ "toc_cell": false,
229
+ "toc_position": {},
230
+ "toc_section_display": true,
231
+ "toc_window_display": false
232
+ }
233
+ },
234
+ "nbformat": 4,
235
+ "nbformat_minor": 5
236
+ }