{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8a904d03-8c1a-4eb4-bd77-a26ec985d992",
   "metadata": {},
   "source": [
    "## Load the Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f398bea9-d201-43ae-95e8-d4fad1de651c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Opening modified dataset uploaded to HF\n",
    "from datasets import load_dataset\n",
    "\n",
    "dataset_name = \"RaviNaik/oasst1-chatml\"\n",
    "dataset = load_dataset(dataset_name, split=\"train\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b74449fb-9f01-4c96-8a7e-6661180be1f3",
   "metadata": {},
   "source": [
    "## Load the Model, Tokenizer and configure bnb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ab5a26da-b9e0-424f-b482-034c10f049ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8f5d677d-082a-4d96-9fab-4d66981ecc27",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "88aa19589a2b46f9888c214db3867598",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model_name = \"microsoft/phi-2\"\n",
    "\n",
    "bnb_config = BitsAndBytesConfig(\n",
    "    load_in_4bit=True,\n",
    "    bnb_4bit_quant_type=\"nf4\",\n",
    "    bnb_4bit_compute_dtype=torch.float16,\n",
    ")\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name,\n",
    "    quantization_config=bnb_config,\n",
    "    trust_remote_code=True,\n",
    "    device_map=\"cuda:0\"\n",
    ")\n",
    "model.config.use_cache = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0d649bc1-69d0-4683-b3e7-b999f4a52ce7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, device_map=\"cuda:0\")\n",
    "tokenizer.pad_token = tokenizer.eos_token"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2304e9e1-fe0a-48c9-8eaa-605af8d93ea1",
   "metadata": {},
   "source": [
    "## Display Model Layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0d572c85-fc5b-407d-bdd9-667c3cdb74bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PhiForCausalLM(\n",
       "  (transformer): PhiModel(\n",
       "    (embd): Embedding(\n",
       "      (wte): Embedding(51200, 2560)\n",
       "      (drop): Dropout(p=0.0, inplace=False)\n",
       "    )\n",
       "    (h): ModuleList(\n",
       "      (0-31): 32 x ParallelBlock(\n",
       "        (ln): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n",
       "        (resid_dropout): Dropout(p=0.1, inplace=False)\n",
       "        (mixer): MHA(\n",
       "          (rotary_emb): RotaryEmbedding()\n",
       "          (Wqkv): Linear4bit(in_features=2560, out_features=7680, bias=True)\n",
       "          (out_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)\n",
       "          (inner_attn): SelfAttention(\n",
       "            (drop): Dropout(p=0.0, inplace=False)\n",
       "          )\n",
       "          (inner_cross_attn): CrossAttention(\n",
       "            (drop): Dropout(p=0.0, inplace=False)\n",
       "          )\n",
       "        )\n",
       "        (mlp): MLP(\n",
       "          (fc1): Linear4bit(in_features=2560, out_features=10240, bias=True)\n",
       "          (fc2): Linear4bit(in_features=10240, out_features=2560, bias=True)\n",
       "          (act): NewGELUActivation()\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "  )\n",
       "  (lm_head): CausalLMHead(\n",
       "    (ln): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n",
       "    (linear): Linear(in_features=2560, out_features=51200, bias=True)\n",
       "  )\n",
       "  (loss): CausalLMLoss(\n",
       "    (loss_fct): CrossEntropyLoss()\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d628159-3f0e-43eb-bf7b-12129f54e0df",
   "metadata": {},
   "source": [
    "## Configure LoRA for finetuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "45d0e0e8-a8d8-4a05-9c7d-4f6217b57310",
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import LoraConfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2cb2780d-609d-4fba-902d-a2526863b02c",
   "metadata": {},
   "outputs": [],
   "source": [
    "lora_alpha = 16\n",
    "lora_dropout = 0.1\n",
    "lora_r = 64\n",
    "\n",
    "peft_config = LoraConfig(\n",
    "    lora_alpha=lora_alpha,\n",
    "    lora_dropout=lora_dropout,\n",
    "    r=lora_r,\n",
    "    bias=\"none\",\n",
    "    task_type=\"CAUSAL_LM\",\n",
    "    target_modules=[\n",
    "        \"Wqkv\",\n",
    "        \"out_proj\",\n",
    "        \"fc1\",\n",
    "        \"fc2\",\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f1ed1b63-2263-4574-9497-180acd38ec14",
   "metadata": {},
   "source": [
    "## Configure Training Params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "f5074cda-91b9-4077-9d9f-b74d34778767",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import TrainingArguments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a793135a-a797-42b1-935d-fb969772a91f",
   "metadata": {},
   "outputs": [],
   "source": [
    "output_dir = \"./results\"\n",
    "per_device_train_batch_size = 4\n",
    "gradient_accumulation_steps = 4\n",
    "optim = \"paged_adamw_32bit\"\n",
    "save_steps = 100\n",
    "logging_steps = 10\n",
    "learning_rate = 2e-4\n",
    "max_grad_norm = 0.3\n",
    "max_steps = 500\n",
    "warmup_ratio = 0.03\n",
    "lr_scheduler_type = \"constant\"\n",
    "\n",
    "training_arguments = TrainingArguments(\n",
    "    output_dir=output_dir,\n",
    "    per_device_train_batch_size=per_device_train_batch_size,\n",
    "    gradient_accumulation_steps=gradient_accumulation_steps,\n",
    "    optim=optim,\n",
    "    save_steps=save_steps,\n",
    "    logging_steps=logging_steps,\n",
    "    learning_rate=learning_rate,\n",
    "    fp16=True,\n",
    "    max_grad_norm=max_grad_norm,\n",
    "    max_steps=max_steps,\n",
    "    warmup_ratio=warmup_ratio,\n",
    "    group_by_length=True,\n",
    "    lr_scheduler_type=lr_scheduler_type,\n",
    "    gradient_checkpointing=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "606d810a-8eaa-42df-9804-9f6bd421dee6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from trl import SFTTrainer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d554e892-5966-4cb3-9ef4-ede9a14a50e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "max_seq_length = 256\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model=model,\n",
    "    train_dataset=dataset,\n",
    "    peft_config=peft_config,\n",
    "    dataset_text_field=\"text\",\n",
    "    max_seq_length=max_seq_length,\n",
    "    tokenizer=tokenizer,\n",
    "    args=training_arguments,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "129c1914-447d-4e95-9346-fe9eb892eb12",
   "metadata": {},
   "outputs": [],
   "source": [
    "for name, module in trainer.model.named_modules():\n",
    "    if \"norm\" in name:\n",
    "        module = module.to(torch.float32).to(\"cuda:0\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a9f421a1-073d-4042-a092-650d7f6d27ec",
   "metadata": {},
   "source": [
    "## Begin Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ca7336ee-fc7d-479e-a384-24737ab74007",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
      "/home/ravi.naik/miniconda3/envs/torchenv/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='500' max='500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [500/500 1:11:36, Epoch 1/2]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>1.720800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>1.548300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>30</td>\n",
       "      <td>1.550700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>40</td>\n",
       "      <td>1.543500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>50</td>\n",
       "      <td>1.506600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>60</td>\n",
       "      <td>1.501400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>70</td>\n",
       "      <td>1.559600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>80</td>\n",
       "      <td>1.552900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>90</td>\n",
       "      <td>1.506300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>1.471700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>110</td>\n",
       "      <td>1.484100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>120</td>\n",
       "      <td>1.498100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>130</td>\n",
       "      <td>1.517600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>140</td>\n",
       "      <td>1.481100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>150</td>\n",
       "      <td>1.485100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>160</td>\n",
       "      <td>1.516800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>170</td>\n",
       "      <td>1.505000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>180</td>\n",
       "      <td>1.484600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>190</td>\n",
       "      <td>1.495200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>1.447100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>210</td>\n",
       "      <td>1.520700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>220</td>\n",
       "      <td>1.444500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>230</td>\n",
       "      <td>1.464800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>240</td>\n",
       "      <td>1.480200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>250</td>\n",
       "      <td>1.444100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>260</td>\n",
       "      <td>1.543900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>270</td>\n",
       "      <td>1.512700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>280</td>\n",
       "      <td>1.441300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>290</td>\n",
       "      <td>1.502200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>1.476900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>310</td>\n",
       "      <td>1.478200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>320</td>\n",
       "      <td>1.481000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>330</td>\n",
       "      <td>1.433600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>340</td>\n",
       "      <td>1.404000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>350</td>\n",
       "      <td>1.401000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>360</td>\n",
       "      <td>1.424400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>370</td>\n",
       "      <td>1.429100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>380</td>\n",
       "      <td>1.388700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>390</td>\n",
       "      <td>1.402600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>400</td>\n",
       "      <td>1.417900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>410</td>\n",
       "      <td>1.358200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>420</td>\n",
       "      <td>1.460700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>430</td>\n",
       "      <td>1.417800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>440</td>\n",
       "      <td>1.447300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>450</td>\n",
       "      <td>1.429200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>460</td>\n",
       "      <td>1.388100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>470</td>\n",
       "      <td>1.433200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>480</td>\n",
       "      <td>1.431600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>490</td>\n",
       "      <td>1.491200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>500</td>\n",
       "      <td>1.406600</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ravi.naik/miniconda3/envs/torchenv/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a595199a17b14a5b99d4c06f5eda00af",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/866 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using a model of type phi to instantiate a model of type phi-msft. This is not supported for all configurations of models and can yield errors.\n",
      "/home/ravi.naik/miniconda3/envs/torchenv/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n",
      "You are using a model of type phi to instantiate a model of type phi-msft. This is not supported for all configurations of models and can yield errors.\n",
      "/home/ravi.naik/miniconda3/envs/torchenv/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n",
      "You are using a model of type phi to instantiate a model of type phi-msft. This is not supported for all configurations of models and can yield errors.\n",
      "/home/ravi.naik/miniconda3/envs/torchenv/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
      "  warnings.warn('Was asked to gather along dimension 0, but all '\n",
      "You are using a model of type phi to instantiate a model of type phi-msft. This is not supported for all configurations of models and can yield errors.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=500, training_loss=1.4746462078094482, metrics={'train_runtime': 4307.6684, 'train_samples_per_second': 3.714, 'train_steps_per_second': 0.116, 'total_flos': 6.667526640623616e+16, 'train_loss': 1.4746462078094482, 'epoch': 1.62})"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94cbfe9f-3bf4-4f0c-bc7e-8544809d1309",
   "metadata": {},
   "source": [
    "## Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "31262e62-3ca5-41e1-9f69-739d975a0cbb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import (\n",
    "    AutoModelForCausalLM,\n",
    "    AutoTokenizer,\n",
    "    BitsAndBytesConfig,\n",
    "    HfArgumentParser,\n",
    "    TrainingArguments,\n",
    "    pipeline,\n",
    "    logging,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "85ab9662-7a84-43a8-ad83-f4a6aed8f515",
   "metadata": {},
   "outputs": [],
   "source": [
    "chat_template = \"\"\"<|im_start|>system\n",
    "You are a helpful assistant who always respond to user queries<|im_end|>\n",
    "<im_start>user\n",
    "{prompt}<|im_end|>\n",
    "<|im_start|>assistant\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "e3e26a34-acfc-45f7-8a4a-96157e25d1f5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ravi.naik/miniconda3/envs/torchenv/lib/python3.10/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are a helpful assistant who always respond to user queries<|im_end|>\n",
      "<im_start>user\n",
      "What is a large language model?<|im_end|>\n",
      "<|im_start|>assistant\n",
      "A large language model (LLM) is a type of artificial intelligence model that is designed to understand and generate human language. LLMs are typically trained on large amounts of text data and are capable of performing a wide range of language-related tasks, such as language translation, text summarization, and text generation.\n",
      "\n",
      "LLMs are different from traditional language models, such as recurrent neural networks (RNNs) or transformers, in that they are designed to handle large amounts of text data and are able to learn from this data in a more efficient and effective way. This makes them well-suited for tasks that require a deep understanding of language, such as natural language processing (N\n"
     ]
    }
   ],
   "source": [
    "prompt = \"What is a large language model?\"\n",
    "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
    "result = pipe(chat_template.format(prompt=prompt))\n",
    "print(result[0]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "7af8f545-e76c-4ce3-a02d-864404dd357c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are a helpful assistant who always respond to user queries<|im_end|>\n",
      "<im_start>user\n",
      "Write a Python program to print first 50 prime numbers<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Here's a Python program that prints the first 50 prime numbers:\n",
      "\n",
      "```python\n",
      "def is_prime(n):\n",
      "    if n < 2:\n",
      "        return False\n",
      "    for i in range(2, int(n**0.5) + 1):\n",
      "        if n % i == 0:\n",
      "            return False\n",
      "    return True\n",
      "\n",
      "count = 0\n",
      "num = 2\n",
      "while count < 50:\n",
      "    if is_prime(num):\n",
      "        print(num)\n",
      "        count += 1\n",
      "    num += 1\n",
      "```\n",
      "\n",
      "This program uses a helper function `is_prime` to check if a number is prime\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Write a Python program to print first 50 prime numbers\"\n",
    "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
    "result = pipe(chat_template.format(prompt=prompt))\n",
    "print(result[0]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "398c483f-d4d6-4532-aecf-e22c2fc001f0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are a helpful assistant who always respond to user queries<|im_end|>\n",
      "<im_start>user\n",
      "Can you write a short introduction about the relevance of the term monopsony in economics?<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Sure, I'd be happy to help!\n",
      "\n",
      "Monopsony is a term used in economics to describe a market structure in which there is only one buyer for a particular good or service. This means that the buyer has significant bargaining power and can set the price at which they will purchase the good or service.\n",
      "\n",
      "In a monopsony, the buyer has the ability to influence the price of the good or service, which can have significant implications for the market. For example, if the buyer is a large corporation, they may be able to drive down the price of the good or service, which can have negative effects on the\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Can you write a short introduction about the relevance of the term monopsony in economics?\"\n",
    "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
    "result = pipe(chat_template.format(prompt=prompt))\n",
    "print(result[0]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "23d75586-7177-4c82-9f93-170ee982e319",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using a model of type phi to instantiate a model of type phi-msft. This is not supported for all configurations of models and can yield errors.\n"
     ]
    }
   ],
   "source": [
    "trainer.save_model(\"checkpoints\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "7a362585-9064-4961-9160-0e560be70731",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using a model of type phi to instantiate a model of type phi-msft. This is not supported for all configurations of models and can yield errors.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "417e9c7948d7488aa6fc1b8c613d1d75",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c2a5148f24d40439fb06025b7ae1d9e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "events.out.tfevents.1704991189.si-aiwork2.2021276.0:   0%|          | 0.00/13.1k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ea9d618995d740b1a0042aa9a3d8eeb5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5ac3bafc71614c6e830a9ac6b6b1000e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "training_args.bin:   0%|          | 0.00/4.66k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/RaviNaik/Phi2-Osst/commit/987f0bdcb557c8cb356eb79452181f2944c21f8b', commit_message='End of training', commit_description='', oid='987f0bdcb557c8cb356eb79452181f2944c21f8b', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.hub_model_id = \"RaviNaik/Phi2-OSST\"\n",
    "trainer.push_to_hub()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a5308db-e616-44df-8b1d-387c1ca69282",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}