{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "# import matplotlib as plt\n",
    "import random as rn\n",
    "import os\n",
    "os.environ['PYTHONHASHSEED'] = '0'\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''\n",
    "np.random.seed(37)\n",
    "rn.seed(1254)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load data, train, test, validation splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sentence</th>\n",
       "      <th>Label</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S.No.</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Introduction to Quantum Mechanics</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>In this chapter, we explore the foundational p...</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Rise and Fall of Civilizations</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Historical records reveal the complex trajecto...</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Part III: Advanced Mathematical Concepts</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                Sentence  Label\n",
       "S.No.                                                          \n",
       "1                      Introduction to Quantum Mechanics    1.0\n",
       "2      In this chapter, we explore the foundational p...    0.0\n",
       "3                     The Rise and Fall of Civilizations    1.0\n",
       "4      Historical records reveal the complex trajecto...    0.0\n",
       "5               Part III: Advanced Mathematical Concepts    1.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>198.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.555051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.313770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.650000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Label\n",
       "count  198.000000\n",
       "mean     0.555051\n",
       "std      0.313770\n",
       "min      0.000000\n",
       "25%      0.300000\n",
       "50%      0.650000\n",
       "75%      0.800000\n",
       "max      1.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(198, 2)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# EDA\n",
    "path_to_data = \"./data/Sentences_200.csv\"\n",
    "new_data_5_cat = pd.read_csv(path_to_data, index_col='S.No.')\n",
    "print(type(new_data_5_cat))\n",
    "display(new_data_5_cat.head())\n",
    "display(new_data_5_cat.describe())\n",
    "display(new_data_5_cat.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Geetansh\\Desktop\\New_folder\\venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['Sentence', 'Label', 'S.No.'],\n",
       "    num_rows: 160\n",
       "})"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['Sentence', 'Label', 'S.No.'],\n",
       "    num_rows: 20\n",
       "})"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['Sentence', 'Label', 'S.No.'],\n",
       "    num_rows: 18\n",
       "})"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Make test, train, cv splits\n",
    "from datasets import Dataset\n",
    "ds = Dataset.from_pandas(new_data_5_cat)\n",
    "\n",
    "ds_train_temp_dict = ds.train_test_split(train_size=160)\n",
    "ds_train = ds_train_temp_dict['train']\n",
    "ds_test_cv_dict = ds_train_temp_dict['test'].train_test_split(test_size=20)\n",
    "ds_cv = ds_test_cv_dict['train']\n",
    "ds_test = ds_test_cv_dict['test']\n",
    "display(ds_train)\n",
    "display(ds_test)\n",
    "display(ds_cv)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fine tune LLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Geetansh\\Desktop\\New_folder\\venv\\Lib\\site-packages\\transformers\\convert_slow_tokenizer.py:561: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['▁My', '▁name', '▁is', '▁Geeta', 'n', 'sh', '▁Bhardwaj', '.']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get Tokenizer\n",
    "from transformers import AutoTokenizer\n",
    "model_nm = 'microsoft/deberta-v3-small'\n",
    "tokz = AutoTokenizer.from_pretrained(model_nm)\n",
    "tokz.tokenize('My name is Geetansh Bhardwaj.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Map: 100%|██████████| 160/160 [00:00<00:00, 4079.69 examples/s]\n"
     ]
    }
   ],
   "source": [
    "# Tokenize the 'Sentence' column\n",
    "def tokenize_string(row):\n",
    "    return tokz(row['Sentence'])\n",
    "\n",
    "def tokenize_sentence_col(ds):\n",
    "    '''\n",
    "    We will tokenize the 'Sentence' column and add another column 'Sentence_id'. It will be used for fine-tuning\n",
    "    ds: a dataset with 'Sentence' column\n",
    "    '''\n",
    "\n",
    "    tokenized_ds = ds.map(tokenize_string, batch_size=5)\n",
    "    return tokenized_ds\n",
    "\n",
    "tokenized_ds_train = tokenize_sentence_col(ds_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Map: 100%|██████████| 18/18 [00:00<00:00, 2243.01 examples/s]\n"
     ]
    }
   ],
   "source": [
    "# An undocumented fact: Transformers assume that your label column is named \"labels\". Ours is named \"Label\", so we will change that\n",
    "tokenized_ds_train = tokenized_ds_train.rename_columns({'Label' : 'labels'})\n",
    "tokenized_ds_train\n",
    "\n",
    "tokenized_ds_cv = tokenize_sentence_col(ds_cv)\n",
    "tokenized_ds_cv = tokenized_ds_cv.rename_columns({'Label' : 'labels'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "# Get the model (We are actually using a pre-trained one)\n",
    "from transformers import AutoModelForSequenceClassification\n",
    "my_model = AutoModelForSequenceClassification.from_pretrained(model_nm, num_labels=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From c:\\Users\\Geetansh\\Desktop\\New_folder\\venv\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Geetansh\\Desktop\\New_folder\\venv\\Lib\\site-packages\\transformers\\training_args.py:1559: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
      "  warnings.warn(\n",
      "C:\\Users\\Geetansh\\AppData\\Local\\Temp\\ipykernel_4252\\1403743469.py:8: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(my_model, args, train_dataset=tokenized_ds_train, eval_dataset=tokenized_ds_cv,\n"
     ]
    }
   ],
   "source": [
    "from transformers import TrainingArguments, Trainer\n",
    "bs = 5\n",
    "epochs = 4\n",
    "lr = 8e-5\n",
    "args = TrainingArguments('outputs', learning_rate=lr, warmup_ratio=0.1, lr_scheduler_type='cosine', fp16=True,\n",
    "    evaluation_strategy=\"epoch\", per_device_train_batch_size=bs, per_device_eval_batch_size=bs*2,\n",
    "    num_train_epochs=epochs, weight_decay=0.01, report_to='none')\n",
    "trainer = Trainer(my_model, args, train_dataset=tokenized_ds_train, eval_dataset=tokenized_ds_cv,\n",
    "                  tokenizer=tokz)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                \n",
      " 25%|██▌       | 32/128 [00:10<00:26,  3.56it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.09050914645195007, 'eval_runtime': 0.3554, 'eval_samples_per_second': 50.653, 'eval_steps_per_second': 5.628, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                \n",
      " 50%|█████     | 64/128 [00:19<00:17,  3.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.04030601680278778, 'eval_runtime': 0.3239, 'eval_samples_per_second': 55.567, 'eval_steps_per_second': 6.174, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                \n",
      " 76%|███████▌  | 97/128 [00:28<00:10,  2.98it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.022483834996819496, 'eval_runtime': 0.3246, 'eval_samples_per_second': 55.448, 'eval_steps_per_second': 6.161, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                 \n",
      "100%|██████████| 128/128 [00:41<00:00,  3.07it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.0200485959649086, 'eval_runtime': 0.3606, 'eval_samples_per_second': 49.921, 'eval_steps_per_second': 5.547, 'epoch': 4.0}\n",
      "{'train_runtime': 41.7528, 'train_samples_per_second': 15.328, 'train_steps_per_second': 3.066, 'train_loss': 0.11997667700052261, 'epoch': 4.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=128, training_loss=0.11997667700052261, metrics={'train_runtime': 41.7528, 'train_samples_per_second': 15.328, 'train_steps_per_second': 3.066, 'total_flos': 1818871829700.0, 'train_loss': 0.11997667700052261, 'epoch': 4.0})"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Train (Here, fine tune) the model\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Map: 100%|██████████| 20/20 [00:00<00:00, 162.84 examples/s]\n",
      "100%|██████████| 2/2 [00:00<00:00, 13.26it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([0.86230469, 0.28979492, 0.91162109, 0.86816406, 0.87988281,\n",
       "       0.21826172, 0.91064453, 0.89013672, 0.41748047, 0.8984375 ,\n",
       "       0.89355469, 0.14257812, 0.89160156, 0.35131836, 0.34375   ,\n",
       "       0.23815918, 0.87841797, 0.20471191, 0.10784912, 0.02485657])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Report loss for your model using the test set\n",
    "tokenized_ds_test = tokenize_sentence_col(ds_test)\n",
    "tokenized_ds_test = tokenized_ds_test.rename_columns({'Label' : 'labels'})\n",
    "\n",
    "preds = trainer.predict(tokenized_ds_test).predictions.astype(float)\n",
    "preds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MAE: 0.09301467895507813\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.85</td>\n",
       "      <td>0.862305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.40</td>\n",
       "      <td>0.289795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.80</td>\n",
       "      <td>0.911621</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.85</td>\n",
       "      <td>0.868164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.70</td>\n",
       "      <td>0.879883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.30</td>\n",
       "      <td>0.218262</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.75</td>\n",
       "      <td>0.910645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.85</td>\n",
       "      <td>0.890137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.70</td>\n",
       "      <td>0.417480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.90</td>\n",
       "      <td>0.898438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.70</td>\n",
       "      <td>0.893555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.20</td>\n",
       "      <td>0.142578</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0.90</td>\n",
       "      <td>0.891602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0.20</td>\n",
       "      <td>0.351318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.40</td>\n",
       "      <td>0.343750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.20</td>\n",
       "      <td>0.238159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0.75</td>\n",
       "      <td>0.878418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0.30</td>\n",
       "      <td>0.204712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0.00</td>\n",
       "      <td>0.107849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0.00</td>\n",
       "      <td>0.024857</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       a         b\n",
       "0   0.85  0.862305\n",
       "1   0.40  0.289795\n",
       "2   0.80  0.911621\n",
       "3   0.85  0.868164\n",
       "4   0.70  0.879883\n",
       "5   0.30  0.218262\n",
       "6   0.75  0.910645\n",
       "7   0.85  0.890137\n",
       "8   0.70  0.417480\n",
       "9   0.90  0.898438\n",
       "10  0.70  0.893555\n",
       "11  0.20  0.142578\n",
       "12  0.90  0.891602\n",
       "13  0.20  0.351318\n",
       "14  0.40  0.343750\n",
       "15  0.20  0.238159\n",
       "16  0.75  0.878418\n",
       "17  0.30  0.204712\n",
       "18  0.00  0.107849\n",
       "19  0.00  0.024857"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using MAE to calculate loss\n",
    "def get_mae(preds, real):\n",
    "    '''\n",
    "    preds, real: array \n",
    "    '''\n",
    "\n",
    "    mae = np.mean(np.abs(preds - real))\n",
    "    return mae\n",
    "\n",
    "real = np.array(tokenized_ds_test['labels'])\n",
    "\n",
    "print(f\"MAE: {get_mae(preds, real)}\")\n",
    "\n",
    "# Print predictions on test side-by-side\n",
    "m = pd.DataFrame({'a':real.reshape(20,), 'b':preds.reshape(20)})\n",
    "m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MAE of my model: 0.1 (Based on test set)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check if your GPU is available"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Try Exporting the model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### How to pass input to the model for inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SequenceClassifierOutput(loss={'logits': tensor([[0.6899]], device='cuda:0')}, logits=tensor([[0.6899]], device='cuda:0'), hidden_states=None, attentions=None)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "# Use GPU if available, otherwise fall back to CPU\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "# Move the model to the same device\n",
    "my_model.to(device)\n",
    "\n",
    "# Tokenize input and ensure tensors are returned\n",
    "sentence = \"Hey, it's Geetansh\"\n",
    "output = tokz(sentence, return_tensors='pt')\n",
    "\n",
    "# Move input tensors to the same device as the model\n",
    "output = {key: val.to(device) for key, val in output.items()}\n",
    "# print(output)\n",
    "\n",
    "# Set model to evaluation mode\n",
    "my_model.eval()\n",
    "\n",
    "# Perform inference without tracking gradients\n",
    "with torch.no_grad():\n",
    "    # Pass tokenized input to the model\n",
    "    predictions = my_model(**output)\n",
    "\n",
    "# Print predictions\n",
    "print(predictions)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Method 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SequenceClassifierOutput(loss=None, logits=tensor([[0.3520]], device='cuda:0'), hidden_states=None, attentions=None)\n"
     ]
    }
   ],
   "source": [
    "# Save the model and tokeniser to disk\n",
    "save_dir = \"./saved_model\"\n",
    "# tokz.save_pretrained(save_directory=save_dir)\n",
    "# my_model.save_pretrained(save_directory=save_dir)\n",
    "\n",
    "# Use GPU if available, otherwise fall back to CPU\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "# Load the saved model and tokeniser from the disk \n",
    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
    "loaded_tokeniser = AutoTokenizer.from_pretrained(save_dir)\n",
    "loaded_model = AutoModelForSequenceClassification.from_pretrained(save_dir)\n",
    "\n",
    "loaded_model.to(device)\n",
    "\n",
    "# Test with the dummy input\n",
    "# Create a dummy input (same structure as your tokenizer output)\n",
    "dummy_input = loaded_tokeniser(\"This is a test sentence.\", return_tensors='pt')\n",
    "dummy_input = {key: val.to(device) for key, val in dummy_input.items()}\n",
    "\n",
    "with torch.no_grad():\n",
    "    output = loaded_model(**dummy_input)\n",
    "print(output)   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Method 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SequenceClassifierOutput(loss=None, logits=tensor([[0.3520]], device='cuda:0'), hidden_states=None, attentions=None)\n"
     ]
    }
   ],
   "source": [
    "# Save the model and tokeniser to disk\n",
    "save_dir = \"./saved_model2\"\n",
    "# trainer.save_model(save_dir)\n",
    "\n",
    "# Use GPU if available, otherwise fall back to CPU\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "# Load the saved model and tokeniser from the disk \n",
    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
    "loaded_tokeniser = AutoTokenizer.from_pretrained(save_dir)\n",
    "loaded_model = AutoModelForSequenceClassification.from_pretrained(save_dir)\n",
    "\n",
    "loaded_model.to(device)\n",
    "\n",
    "# Test with the same dummy input as before\n",
    "# Create a dummy input (same structure as your tokenizer output)\n",
    "dummy_input = loaded_tokeniser(\"This is a test sentence.\", return_tensors='pt')\n",
    "dummy_input = {key: val.to(device) for key, val in dummy_input.items()}\n",
    "\n",
    "with torch.no_grad():\n",
    "    output = loaded_model(**dummy_input)\n",
    "print(output)  "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}