yasirfaizahmed
/

mnist-digit-image-classification

@@ -2,29 +2,176 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "b2d6f096-6123-4dd0-ae4f-c9abf70889c4",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-04-03T17:21:13.044450Z",
-     "iopub.status.busy": "2024-04-03T17:21:13.043912Z",
-     "iopub.status.idle": "2024-04-03T17:21:13.374087Z",
-     "shell.execute_reply": "2024-04-03T17:21:13.373277Z",
-     "shell.execute_reply.started": "2024-04-03T17:21:13.044419Z"
     }
    },
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Reusing dataset mnist (/root/.cache/huggingface/datasets/mnist/mnist/1.0.0/fda16c03c4ecfb13f165ba7e29cf38129ce035011519968cdaf74894ce91c9d4)\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cd2dd3c8b22b49718b3c66585ca3559c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -44,15 +191,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "7c5ea43d-6a70-4deb-a90e-1d7758c961a3",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-04-03T17:21:27.628082Z",
-     "iopub.status.busy": "2024-04-03T17:21:27.627048Z",
-     "iopub.status.idle": "2024-04-03T17:21:27.632687Z",
-     "shell.execute_reply": "2024-04-03T17:21:27.632002Z",
-     "shell.execute_reply.started": "2024-04-03T17:21:27.628051Z"
     }
    },
    "outputs": [],
@@ -60,40 +207,181 @@
     "import numpy as np\n",
     "from PIL import Image\n",
     "import io\n",
     "\n",
-    "def convert(byte_like_str_image):\n",
-    "    return np.asarray(Image.open(io.BytesIO(byte_like_str_image['bytes'])))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "id": "daafde17-8100-4f49-b27e-8aad43b129c6",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-04-03T17:22:20.343090Z",
-     "iopub.status.busy": "2024-04-03T17:22:20.341876Z",
-     "iopub.status.idle": "2024-04-03T17:22:27.888815Z",
-     "shell.execute_reply": "2024-04-03T17:22:27.887812Z",
-     "shell.execute_reply.started": "2024-04-03T17:22:20.343047Z"
     }
    },
    "outputs": [],
    "source": [
     "dataset_train = dataset['train'].to_pandas()\n",
-    "dataset_train['image'] = dataset_train['image'].map(convert)\n",
     "\n",
     "dataset_test = dataset['test'].to_pandas()\n",
-    "dataset_test['image'] = dataset_test['image'].map(convert)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "72022fd2-000d-4d5c-88d5-9afc62c283d5",
-   "metadata": {},
    "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "b2d6f096-6123-4dd0-ae4f-c9abf70889c4",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-04-04T08:47:31.680497Z",
+     "iopub.status.busy": "2024-04-04T08:47:31.678982Z",
+     "iopub.status.idle": "2024-04-04T08:47:52.012034Z",
+     "shell.execute_reply": "2024-04-04T08:47:52.010508Z",
+     "shell.execute_reply.started": "2024-04-04T08:47:31.680435Z"
     }
    },
    "outputs": [
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f0c8978e9991468cb37176b7ee5e7f40",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script:   0%|          | 0.00/1.63k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1821c061525d4be1b088a0b1624ba0f7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading metadata:   0%|          | 0.00/1.01k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Downloading and preparing dataset mnist/mnist (download: 11.06 MiB, generated: 19.44 MiB, post-processed: Unknown size, total: 30.50 MiB) to /root/.cache/huggingface/datasets/mnist/mnist/1.0.0/fda16c03c4ecfb13f165ba7e29cf38129ce035011519968cdaf74894ce91c9d4...\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eb3da6f2299d4012862a28dd3a74565d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e53657e2c6ad4c108ad703dae1f87efe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/9.91M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2407ea55dc5348619ad666a16a3b97ff",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/28.9k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "04a14bdbb03044979afd7c44ba0f424e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/1.65M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "976dba4172974f01ae37ad90b1c6cb1d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/4.54k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "56d60d708f2549a5a0c6acc0e2499884",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Extracting data files:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "87ba67ec7d834407afd7fcf7a9d6ac13",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split:   0%|          | 0/60000 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "696c5019b4c24f049ce78744c1fecded",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset mnist downloaded and prepared to /root/.cache/huggingface/datasets/mnist/mnist/1.0.0/fda16c03c4ecfb13f165ba7e29cf38129ce035011519968cdaf74894ce91c9d4. Subsequent calls will reuse this data.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5193a728c974100b02fcb82ac236d80",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "7c5ea43d-6a70-4deb-a90e-1d7758c961a3",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-04-04T08:47:52.015528Z",
+     "iopub.status.busy": "2024-04-04T08:47:52.014998Z",
+     "iopub.status.idle": "2024-04-04T08:47:56.923333Z",
+     "shell.execute_reply": "2024-04-04T08:47:56.922341Z",
+     "shell.execute_reply.started": "2024-04-04T08:47:52.015483Z"
     }
    },
    "outputs": [],
     "import numpy as np\n",
     "from PIL import Image\n",
     "import io\n",
+    "import tensorflow as tf\n",
     "\n",
+    "def convert_image(byte_like_str_image):\n",
+    "    return tf.convert_to_tensor(np.asarray(Image.open(io.BytesIO(byte_like_str_image['bytes']))), dtype = tf.float32)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "daafde17-8100-4f49-b27e-8aad43b129c6",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-04-04T08:47:56.925474Z",
+     "iopub.status.busy": "2024-04-04T08:47:56.924961Z",
+     "iopub.status.idle": "2024-04-04T08:48:32.236745Z",
+     "shell.execute_reply": "2024-04-04T08:48:32.234178Z",
+     "shell.execute_reply.started": "2024-04-04T08:47:56.925446Z"
     }
    },
    "outputs": [],
    "source": [
+    "from keras.utils import to_categorical\n",
+    "\n",
     "dataset_train = dataset['train'].to_pandas()\n",
+    "dataset_train['image'] = dataset_train['image'].map(convert_image)\n",
     "\n",
     "dataset_test = dataset['test'].to_pandas()\n",
+    "dataset_test['image'] = dataset_test['image'].map(convert_image)\n",
+    "\n",
+    "# Convert labels to NumPy arrays\n",
+    "X_train = np.array(dataset_train['image'].tolist())\n",
+    "y_train = np.array(dataset_train['label'])\n",
+    "\n",
+    "X_test = np.array(dataset_test['image'].tolist())\n",
+    "y_test = np.array(dataset_test['label'])\n",
+    "# dataset_train['label'] = dataset_train['label'].astype('float32')\n",
+    "# dataset_test['label'] = dataset_test['label'].astype('float32')\n",
+    "\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "72022fd2-000d-4d5c-88d5-9afc62c283d5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-04-04T08:48:32.239111Z",
+     "iopub.status.busy": "2024-04-04T08:48:32.238769Z",
+     "iopub.status.idle": "2024-04-04T08:48:32.369358Z",
+     "shell.execute_reply": "2024-04-04T08:48:32.367989Z",
+     "shell.execute_reply.started": "2024-04-04T08:48:32.239071Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "model = tf.keras.models.Sequential([\n",
+    "  tf.keras.layers.Flatten(input_shape=(28, 28)),\n",
+    "  tf.keras.layers.Dense(128, activation='relu'),\n",
+    "  tf.keras.layers.Dense(10)\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "dd7871ac-cacd-4866-bdda-67651f592262",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-04-04T08:48:32.371520Z",
+     "iopub.status.busy": "2024-04-04T08:48:32.371132Z",
+     "iopub.status.idle": "2024-04-04T08:48:32.391109Z",
+     "shell.execute_reply": "2024-04-04T08:48:32.390162Z",
+     "shell.execute_reply.started": "2024-04-04T08:48:32.371484Z"
+    }
+   },
    "outputs": [],
+   "source": [
+    "model.compile(\n",
+    "    optimizer=tf.keras.optimizers.Adam(0.001),\n",
+    "    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+    "    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "280e0d9d-d9e8-41d9-b9ad-666e84fc0bfa",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-04-04T08:48:32.392690Z",
+     "iopub.status.busy": "2024-04-04T08:48:32.392405Z",
+     "iopub.status.idle": "2024-04-04T08:49:02.402966Z",
+     "shell.execute_reply": "2024-04-04T08:49:02.402204Z",
+     "shell.execute_reply.started": "2024-04-04T08:48:32.392662Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/5\n",
+      "1875/1875 [==============================] - 7s 3ms/step - loss: 2.6159 - sparse_categorical_accuracy: 0.8597 - val_loss: 0.5951 - val_sparse_categorical_accuracy: 0.8762\n",
+      "Epoch 2/5\n",
+      "1875/1875 [==============================] - 6s 3ms/step - loss: 0.4041 - sparse_categorical_accuracy: 0.9071 - val_loss: 0.4567 - val_sparse_categorical_accuracy: 0.9024\n",
+      "Epoch 3/5\n",
+      "1875/1875 [==============================] - 6s 3ms/step - loss: 0.2977 - sparse_categorical_accuracy: 0.9247 - val_loss: 0.3300 - val_sparse_categorical_accuracy: 0.9237\n",
+      "Epoch 4/5\n",
+      "1875/1875 [==============================] - 5s 3ms/step - loss: 0.2646 - sparse_categorical_accuracy: 0.9344 - val_loss: 0.2909 - val_sparse_categorical_accuracy: 0.9368\n",
+      "Epoch 5/5\n",
+      "1875/1875 [==============================] - 5s 3ms/step - loss: 0.2418 - sparse_categorical_accuracy: 0.9391 - val_loss: 0.2886 - val_sparse_categorical_accuracy: 0.9330\n",
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 60000 entries, 0 to 59999\n",
+      "Data columns (total 2 columns):\n",
+      " #   Column  Non-Null Count  Dtype \n",
+      "---  ------  --------------  ----- \n",
+      " 0   image   60000 non-null  object\n",
+      " 1   label   60000 non-null  int64 \n",
+      "dtypes: int64(1), object(1)\n",
+      "memory usage: 937.6+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "# model.fit(\n",
+    "#     dataset_train['image'], dataset_test['label'],\n",
+    "#     epochs=10,\n",
+    "#     validation_data=(dataset_test['image'], dataset_test['label']),\n",
+    "# )\n",
+    "# dataset_train['label'].head(50)\n",
+    "model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))\n",
+    "\n",
+    "dataset_train.info()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c7317f9a-14f4-4908-9895-8bc085900e28",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-04-04T08:49:35.659063Z",
+     "iopub.status.busy": "2024-04-04T08:49:35.658449Z",
+     "iopub.status.idle": "2024-04-04T08:49:36.588926Z",
+     "shell.execute_reply": "2024-04-04T08:49:36.588088Z",
+     "shell.execute_reply.started": "2024-04-04T08:49:35.659019Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "313/313 [==============================] - 1s 3ms/step - loss: 0.2886 - sparse_categorical_accuracy: 0.9330\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.9330000281333923"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "loss, accuracy = model.evaluate(X_test, y_test)\n",
+    "accuracy"
+   ]
   }
  ],
  "metadata": {