panderior
/

aiornot

Image Classification

English

Model card Files Files and versions Community

panderior commited on Mar 26, 2023

Commit

d23fad5

1 Parent(s): e481545

Upload Coding Excercise.ipynb

Browse files

Files changed (1) hide show

Coding Excercise.ipynb +236 -0

Coding Excercise.ipynb ADDED Viewed

	@@ -0,0 +1,236 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d8c593f",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-03-26T07:31:34.213141Z",
+     "start_time": "2023-03-26T07:31:14.082603Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "!pip install huggingface_hub\n",
+    "!pip install datasets\n",
+    "!pip install keras"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bca92d1d",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-03-26T14:03:42.287776Z",
+     "start_time": "2023-03-26T14:03:39.989670Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import notebook_login\n",
+    "from datasets import load_dataset\n",
+    "import pandas as pd\n",
+    "from datasets import load_dataset\n",
+    "import tensorflow as tf\n",
+    "from tensorflow.keras.applications.vgg16 import VGG16\n",
+    "from tensorflow.keras.models import Model\n",
+    "from tensorflow.keras.layers import Dense, GlobalAveragePooling2D\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from tensorflow.keras.utils import to_categorical\n",
+    "from PIL import Image\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62254f94",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-03-26T14:03:42.317000Z",
+     "start_time": "2023-03-26T14:03:42.289947Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57308b59",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-03-26T14:03:52.591875Z",
+     "start_time": "2023-03-26T14:03:48.476822Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# load dataset from hugging face\n",
+    "# prepare data for training, validation and testing\n",
+    "train_ds, val_ds = load_dataset('competitions/aiornot', split=\"train\").train_test_split(test_size=0.15).values()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b83b1536",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-03-26T14:04:10.210069Z",
+     "start_time": "2023-03-26T14:03:53.833533Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "data_sz = 1000\n",
+    "X_train = train_ds[:data_sz]['image']\n",
+    "X_val = val_ds[:data_sz]['image']\n",
+    "Y_train = to_categorical(train_ds[:data_sz]['label'])\n",
+    "Y_val = to_categorical(val_ds[:data_sz]['label'])\n",
+    "# Convert the input data to a NumPy array\n",
+    "X_train = np.stack([np.array(image) for image in X_train])\n",
+    "X_val = np.stack([np.array(image) for image in X_val])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72df9419",
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2023-03-26T14:04:33.658Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "with tf.device('/device:GPU:3'):\n",
+    "    # Load the VGG16 model pre-trained on ImageNet\n",
+    "    base_model = VGG16(weights='imagenet', include_top=False)\n",
+    "\n",
+    "    # Add a global spatial average pooling layer\n",
+    "    x = base_model.output\n",
+    "    x = GlobalAveragePooling2D()(x)\n",
+    "\n",
+    "    # Add a fully-connected layer\n",
+    "    x = Dense(1024, activation='relu')(x)\n",
+    "\n",
+    "    # Add a logistic layer with the number of classes of target variable\n",
+    "    num_classes = 2\n",
+    "    predictions = Dense(num_classes, activation='softmax')(x)\n",
+    "\n",
+    "    # Create the final model\n",
+    "    model = Model(inputs=base_model.input, outputs=predictions)\n",
+    "\n",
+    "    # Freeze all layers in the base VGG16 model\n",
+    "    for layer in base_model.layers:\n",
+    "        layer.trainable = False\n",
+    "\n",
+    "    # Compile the model\n",
+    "    model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])\n",
+    "\n",
+    "# Train the model on your new dataset\n",
+    "model.fit(X_train, Y_train, epochs=10, validation_data=(X_val, Y_val))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbf079b7",
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2023-03-26T14:05:03.786Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Generate predictions for the data\n",
+    "y_pred = model.predict(X_val)\n",
+    "# Convert predictions and true labels to class indices\n",
+    "y_pred_classes = y_pred.argmax(axis=1)\n",
+    "y_true_classes = Y_val.argmax(axis=1)\n",
+    "# Find the indices of the misclassified samples\n",
+    "misclassified_indices = np.where(y_pred_classes != y_true_classes)[0]\n",
+    "\n",
+    "# Get the misclassified samples\n",
+    "# x_misclassified = X_val[misclassified_indices]\n",
+    "# y_misclassified_true = Y_val[misclassified_indices]\n",
+    "# y_misclassified_pred = y_pred[misclassified_indices]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1e639f6b",
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2023-03-26T14:05:06.090Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# a helper function to view missclassfied data with the image and prediction\n",
+    "def checkMiss(idx):\n",
+    "    print(\"\\ncorrect:\", Y_val[idx])\n",
+    "    print(\"miss:\", y_pred[idx])\n",
+    "    img = Image.fromarray(X_val[idx])\n",
+    "    img.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "951ff24e",
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2023-03-26T14:05:07.650Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# view 5 miss classified data to see what could be improved\n",
+    "for i in range(10):\n",
+    "    checkMiss(misclassified_indices[i])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}