Upload Coding Excercise.ipynb
Browse files- Coding Excercise.ipynb +236 -0
Coding Excercise.ipynb
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "3d8c593f",
|
7 |
+
"metadata": {
|
8 |
+
"ExecuteTime": {
|
9 |
+
"end_time": "2023-03-26T07:31:34.213141Z",
|
10 |
+
"start_time": "2023-03-26T07:31:14.082603Z"
|
11 |
+
},
|
12 |
+
"scrolled": true
|
13 |
+
},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"!pip install huggingface_hub\n",
|
17 |
+
"!pip install datasets\n",
|
18 |
+
"!pip install keras"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": null,
|
24 |
+
"id": "bca92d1d",
|
25 |
+
"metadata": {
|
26 |
+
"ExecuteTime": {
|
27 |
+
"end_time": "2023-03-26T14:03:42.287776Z",
|
28 |
+
"start_time": "2023-03-26T14:03:39.989670Z"
|
29 |
+
}
|
30 |
+
},
|
31 |
+
"outputs": [],
|
32 |
+
"source": [
|
33 |
+
"from huggingface_hub import notebook_login\n",
|
34 |
+
"from datasets import load_dataset\n",
|
35 |
+
"import pandas as pd\n",
|
36 |
+
"from datasets import load_dataset\n",
|
37 |
+
"import tensorflow as tf\n",
|
38 |
+
"from tensorflow.keras.applications.vgg16 import VGG16\n",
|
39 |
+
"from tensorflow.keras.models import Model\n",
|
40 |
+
"from tensorflow.keras.layers import Dense, GlobalAveragePooling2D\n",
|
41 |
+
"from tensorflow.keras.optimizers import Adam\n",
|
42 |
+
"from tensorflow.keras.utils import to_categorical\n",
|
43 |
+
"from PIL import Image\n",
|
44 |
+
"import numpy as np"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "code",
|
49 |
+
"execution_count": null,
|
50 |
+
"id": "62254f94",
|
51 |
+
"metadata": {
|
52 |
+
"ExecuteTime": {
|
53 |
+
"end_time": "2023-03-26T14:03:42.317000Z",
|
54 |
+
"start_time": "2023-03-26T14:03:42.289947Z"
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"outputs": [],
|
58 |
+
"source": [
|
59 |
+
"notebook_login()"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": null,
|
65 |
+
"id": "57308b59",
|
66 |
+
"metadata": {
|
67 |
+
"ExecuteTime": {
|
68 |
+
"end_time": "2023-03-26T14:03:52.591875Z",
|
69 |
+
"start_time": "2023-03-26T14:03:48.476822Z"
|
70 |
+
}
|
71 |
+
},
|
72 |
+
"outputs": [],
|
73 |
+
"source": [
|
74 |
+
"# load dataset from hugging face\n",
|
75 |
+
"# prepare data for training, validation and testing\n",
|
76 |
+
"train_ds, val_ds = load_dataset('competitions/aiornot', split=\"train\").train_test_split(test_size=0.15).values()"
|
77 |
+
]
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": null,
|
82 |
+
"id": "b83b1536",
|
83 |
+
"metadata": {
|
84 |
+
"ExecuteTime": {
|
85 |
+
"end_time": "2023-03-26T14:04:10.210069Z",
|
86 |
+
"start_time": "2023-03-26T14:03:53.833533Z"
|
87 |
+
}
|
88 |
+
},
|
89 |
+
"outputs": [],
|
90 |
+
"source": [
|
91 |
+
"data_sz = 1000\n",
|
92 |
+
"X_train = train_ds[:data_sz]['image']\n",
|
93 |
+
"X_val = val_ds[:data_sz]['image']\n",
|
94 |
+
"Y_train = to_categorical(train_ds[:data_sz]['label'])\n",
|
95 |
+
"Y_val = to_categorical(val_ds[:data_sz]['label'])\n",
|
96 |
+
"# Convert the input data to a NumPy array\n",
|
97 |
+
"X_train = np.stack([np.array(image) for image in X_train])\n",
|
98 |
+
"X_val = np.stack([np.array(image) for image in X_val])"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"cell_type": "code",
|
103 |
+
"execution_count": null,
|
104 |
+
"id": "72df9419",
|
105 |
+
"metadata": {
|
106 |
+
"ExecuteTime": {
|
107 |
+
"start_time": "2023-03-26T14:04:33.658Z"
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"outputs": [],
|
111 |
+
"source": [
|
112 |
+
"with tf.device('/device:GPU:3'):\n",
|
113 |
+
" # Load the VGG16 model pre-trained on ImageNet\n",
|
114 |
+
" base_model = VGG16(weights='imagenet', include_top=False)\n",
|
115 |
+
"\n",
|
116 |
+
" # Add a global spatial average pooling layer\n",
|
117 |
+
" x = base_model.output\n",
|
118 |
+
" x = GlobalAveragePooling2D()(x)\n",
|
119 |
+
"\n",
|
120 |
+
" # Add a fully-connected layer\n",
|
121 |
+
" x = Dense(1024, activation='relu')(x)\n",
|
122 |
+
"\n",
|
123 |
+
" # Add a logistic layer with the number of classes of target variable\n",
|
124 |
+
" num_classes = 2\n",
|
125 |
+
" predictions = Dense(num_classes, activation='softmax')(x)\n",
|
126 |
+
"\n",
|
127 |
+
" # Create the final model\n",
|
128 |
+
" model = Model(inputs=base_model.input, outputs=predictions)\n",
|
129 |
+
"\n",
|
130 |
+
" # Freeze all layers in the base VGG16 model\n",
|
131 |
+
" for layer in base_model.layers:\n",
|
132 |
+
" layer.trainable = False\n",
|
133 |
+
"\n",
|
134 |
+
" # Compile the model\n",
|
135 |
+
" model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])\n",
|
136 |
+
"\n",
|
137 |
+
"# Train the model on your new dataset\n",
|
138 |
+
"model.fit(X_train, Y_train, epochs=10, validation_data=(X_val, Y_val))"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": null,
|
144 |
+
"id": "bbf079b7",
|
145 |
+
"metadata": {
|
146 |
+
"ExecuteTime": {
|
147 |
+
"start_time": "2023-03-26T14:05:03.786Z"
|
148 |
+
}
|
149 |
+
},
|
150 |
+
"outputs": [],
|
151 |
+
"source": [
|
152 |
+
"# Generate predictions for the data\n",
|
153 |
+
"y_pred = model.predict(X_val)\n",
|
154 |
+
"# Convert predictions and true labels to class indices\n",
|
155 |
+
"y_pred_classes = y_pred.argmax(axis=1)\n",
|
156 |
+
"y_true_classes = Y_val.argmax(axis=1)\n",
|
157 |
+
"# Find the indices of the misclassified samples\n",
|
158 |
+
"misclassified_indices = np.where(y_pred_classes != y_true_classes)[0]\n",
|
159 |
+
"\n",
|
160 |
+
"# Get the misclassified samples\n",
|
161 |
+
"# x_misclassified = X_val[misclassified_indices]\n",
|
162 |
+
"# y_misclassified_true = Y_val[misclassified_indices]\n",
|
163 |
+
"# y_misclassified_pred = y_pred[misclassified_indices]"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"cell_type": "code",
|
168 |
+
"execution_count": null,
|
169 |
+
"id": "1e639f6b",
|
170 |
+
"metadata": {
|
171 |
+
"ExecuteTime": {
|
172 |
+
"start_time": "2023-03-26T14:05:06.090Z"
|
173 |
+
}
|
174 |
+
},
|
175 |
+
"outputs": [],
|
176 |
+
"source": [
|
177 |
+
"# a helper function to view missclassfied data with the image and prediction\n",
|
178 |
+
"def checkMiss(idx):\n",
|
179 |
+
" print(\"\\ncorrect:\", Y_val[idx])\n",
|
180 |
+
" print(\"miss:\", y_pred[idx])\n",
|
181 |
+
" img = Image.fromarray(X_val[idx])\n",
|
182 |
+
" img.show()"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"cell_type": "code",
|
187 |
+
"execution_count": null,
|
188 |
+
"id": "951ff24e",
|
189 |
+
"metadata": {
|
190 |
+
"ExecuteTime": {
|
191 |
+
"start_time": "2023-03-26T14:05:07.650Z"
|
192 |
+
}
|
193 |
+
},
|
194 |
+
"outputs": [],
|
195 |
+
"source": [
|
196 |
+
"# view 5 miss classified data to see what could be improved\n",
|
197 |
+
"for i in range(10):\n",
|
198 |
+
" checkMiss(misclassified_indices[i])"
|
199 |
+
]
|
200 |
+
}
|
201 |
+
],
|
202 |
+
"metadata": {
|
203 |
+
"kernelspec": {
|
204 |
+
"display_name": "Python 3",
|
205 |
+
"language": "python",
|
206 |
+
"name": "python3"
|
207 |
+
},
|
208 |
+
"language_info": {
|
209 |
+
"codemirror_mode": {
|
210 |
+
"name": "ipython",
|
211 |
+
"version": 3
|
212 |
+
},
|
213 |
+
"file_extension": ".py",
|
214 |
+
"mimetype": "text/x-python",
|
215 |
+
"name": "python",
|
216 |
+
"nbconvert_exporter": "python",
|
217 |
+
"pygments_lexer": "ipython3",
|
218 |
+
"version": "3.8.10"
|
219 |
+
},
|
220 |
+
"toc": {
|
221 |
+
"base_numbering": 1,
|
222 |
+
"nav_menu": {},
|
223 |
+
"number_sections": false,
|
224 |
+
"sideBar": true,
|
225 |
+
"skip_h1_title": false,
|
226 |
+
"title_cell": "Table of Contents",
|
227 |
+
"title_sidebar": "Contents",
|
228 |
+
"toc_cell": false,
|
229 |
+
"toc_position": {},
|
230 |
+
"toc_section_display": true,
|
231 |
+
"toc_window_display": false
|
232 |
+
}
|
233 |
+
},
|
234 |
+
"nbformat": 4,
|
235 |
+
"nbformat_minor": 5
|
236 |
+
}
|