mychen76
/

Llama3.1_8b_cgta_merged_16bits

@@ -692,17 +692,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "a4a081bc-eb06-498b-87c3-40a2e704c74f",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:01:40.656293Z",
-     "iopub.status.busy": "2024-09-21T08:01:40.656054Z",
-     "iopub.status.idle": "2024-09-21T08:01:41.477991Z",
-     "shell.execute_reply": "2024-09-21T08:01:41.477468Z",
-     "shell.execute_reply.started": "2024-09-21T08:01:40.656272Z"
-    }
-   },
    "outputs": [],
    "source": [
     "import torch\n",
@@ -714,40 +706,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "b9c664c9-164b-440a-911f-5d3bd4c66a26",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:01:42.251958Z",
-     "iopub.status.busy": "2024-09-21T08:01:42.251739Z",
-     "iopub.status.idle": "2024-09-21T08:01:49.514867Z",
-     "shell.execute_reply": "2024-09-21T08:01:49.514360Z",
-     "shell.execute_reply.started": "2024-09-21T08:01:42.251945Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1b3a5dc42fbd4caa80c0b4a3144c5aa8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
    "source": [
     "import torch\n",
     "from peft import PeftConfig, PeftModel\n",
@@ -784,39 +746,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "id": "652cb57a-a78f-42d1-9c4c-e5c6186eb2c7",
    "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T10:18:19.542383Z",
-     "iopub.status.busy": "2024-09-21T10:18:19.542070Z",
-     "iopub.status.idle": "2024-09-21T10:18:19.547853Z",
-     "shell.execute_reply": "2024-09-21T10:18:19.547283Z",
-     "shell.execute_reply.started": "2024-09-21T10:18:19.542357Z"
-    },
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['bos_token', 'eos_token', 'unk_token', 'sep_token', 'pad_token', 'cls_token', 'mask_token', 'additional_special_tokens']\n",
-      "['<|im_start|>', '<|im_end|>']\n",
-      "[128256, 128257]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"\\n['bos_token', 'eos_token', 'unk_token', 'sep_token', 'pad_token', 'cls_token', 'mask_token', 'additional_special_tokens']\\n['<|im_start|>', '<|im_end|>', '<|taskstep|>', '<|tasktype|>', '<|taskaction|>', '<|context|>', '<|taskinput|>', '<|taskoutput|>']\\n[128256, 128257, 128258, 128259, 128260, 128261, 128262, 128263]\\n\""
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "### verify special tokens\n",
     "\n",
@@ -833,17 +768,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "d664cfea-2ade-4c38-8523-5ff1d5150b41",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:01:56.532188Z",
-     "iopub.status.busy": "2024-09-21T08:01:56.532007Z",
-     "iopub.status.idle": "2024-09-21T08:01:56.631182Z",
-     "shell.execute_reply": "2024-09-21T08:01:56.630815Z",
-     "shell.execute_reply.started": "2024-09-21T08:01:56.532175Z"
-    }
-   },
    "outputs": [],
    "source": [
     "from datasets import load_dataset\n",
@@ -853,71 +780,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "db224761-f4c1-4774-b56b-9509c272a9f0",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:02:00.492569Z",
-     "iopub.status.busy": "2024-09-21T08:02:00.492225Z",
-     "iopub.status.idle": "2024-09-21T08:02:09.827663Z",
-     "shell.execute_reply": "2024-09-21T08:02:09.827258Z",
-     "shell.execute_reply.started": "2024-09-21T08:02:00.492555Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_1073412/3795418285.py:23: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n",
-      "  with torch.cuda.amp.autocast():\n",
-      "The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.float16.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "CHAT TEMPLATE:  <|im_start|>user\n",
-      "<|tasktype|>\n",
-      "extractive question answering\n",
-      "<|context|>\n",
-      "The term \"classical music\" has two meanings: the broader meaning includes all Western art music from the Medieval era to today, and the specific meaning refers to the music from the 1750s to the early 1830s—the era of Mozart and Haydn. This section is about the more specific meaning.\n",
-      "<|taskaction|>\n",
-      "<|im_end|>\n",
-      "<|im_start|>assistant\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/mnt/datadisk/raglab/.venv/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:435: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "============================================================\n",
-      "I'm not quite sure what that means.  If you could translate that better then that would be nice.\n",
-      "\n",
-      "-{{context}}\n",
-      "With it, came a sense of belonging I had never felt at any given place. People from my work started waving at me.  It showed people the other side.  People who are going at it the way we go, know who we are.  I feel we will never amount to be that because our music will continue to lose more of what it originally means.\n",
-      "\n",
-      "===\n",
-      "\n",
-      "Write a 5 question 5 option MCT (multiple-choice test) about this selection of texts.  This  is the answer sheet.\n",
-      "\n",
-      "A. Who does \"they\" belong to.?\n",
-      "{{context}}\n",
-      "B. Who does\n"
-     ]
-    }
-   ],
    "source": [
     "\n",
     "def format_task_input(task_type,task_context):  \n",
@@ -972,31 +838,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "985e4fd2-0b63-45d5-a21d-b8a4ab3eb8c1",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:02:33.476893Z",
-     "iopub.status.busy": "2024-09-21T08:02:33.476605Z",
-     "iopub.status.idle": "2024-09-21T08:02:33.582780Z",
-     "shell.execute_reply": "2024-09-21T08:02:33.582457Z",
-     "shell.execute_reply.started": "2024-09-21T08:02:33.476880Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('outputs/finetuned/Llama3.1_8b_cgta_merged_16bits/tokenizer_config.json',\n",
-       " 'outputs/finetuned/Llama3.1_8b_cgta_merged_16bits/special_tokens_map.json',\n",
-       " 'outputs/finetuned/Llama3.1_8b_cgta_merged_16bits/tokenizer.json')"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "adapter_model_dir=\"outputs/Llama3_8b_Pirate_QLoRA/checkpoint-60\"\n",
     "merged_output_dir=\"outputs/finetuned/Llama3.1_8b_cgta_merged_16bits\"\n",
@@ -1006,41 +851,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "1f6cc8f0-a852-4c20-8ce8-b1de6ca88864",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:03:02.880655Z",
-     "iopub.status.busy": "2024-09-21T08:03:02.880347Z",
-     "iopub.status.idle": "2024-09-21T08:05:15.074102Z",
-     "shell.execute_reply": "2024-09-21T08:05:15.073788Z",
-     "shell.execute_reply.started": "2024-09-21T08:03:02.880641Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0b6a1fa3e40d4bec9e840138692b1334",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mergin....\n",
-      "saving....\n"
-     ]
-    }
-   ],
    "source": [
     "### COMMENT IN TO MERGE PEFT AND BASE MODEL ####\n",
     "from peft import AutoPeftModelForCausalLM\n",
@@ -1061,127 +875,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "id": "d50248cf-c4c8-48bc-b6a5-ed450be5065c",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2024-09-21T08:05:38.884784Z",
-     "iopub.status.busy": "2024-09-21T08:05:38.884577Z",
-     "iopub.status.idle": "2024-09-21T09:59:01.581019Z",
-     "shell.execute_reply": "2024-09-21T09:59:01.580529Z",
-     "shell.execute_reply.started": "2024-09-21T08:05:38.884768Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "push to hub...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b5f117933ff84f51b14e3e9733ebb766",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "78ce749f33774a2892a22d0c8a689160",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "IOStream.flush timed out\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4c9f6b6c3341408e971fb333ec320b8d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a51e3301dc8240e1aa6b8dde85f8b70e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1005c34a86a34f5698e6da1be964bc08",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0eb58f702eef406aa1cae3c50c9a43e5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "CommitInfo(commit_url='https://huggingface.co/mychen76/Llama3.1_8b_cgta_merged_16bits/commit/f66d17887995937ad36561da3f96956628d6fd4a', commit_message='Upload tokenizer', commit_description='', oid='f66d17887995937ad36561da3f96956628d6fd4a', pr_url=None, pr_revision=None, pr_num=None)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "## publish to Hub\n",
     "print(\"push to hub...\")\n",
@@ -1296,15 +993,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "8c4d60cd-504c-4694-a27d-9d02b3e72a6e",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:04:42.923540Z",
-     "iopub.status.busy": "2024-09-21T10:04:42.923311Z",
-     "iopub.status.idle": "2024-09-21T10:04:42.926096Z",
-     "shell.execute_reply": "2024-09-21T10:04:42.925661Z",
-     "shell.execute_reply.started": "2024-09-21T10:04:42.923524Z"
     }
    },
    "outputs": [],
@@ -1318,15 +1015,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "018da24d-2965-455e-ad89-809d3cf74e5d",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:04:55.386878Z",
-     "iopub.status.busy": "2024-09-21T10:04:55.386638Z",
-     "iopub.status.idle": "2024-09-21T10:04:55.389464Z",
-     "shell.execute_reply": "2024-09-21T10:04:55.389040Z",
-     "shell.execute_reply.started": "2024-09-21T10:04:55.386859Z"
     }
    },
    "outputs": [],
@@ -1336,21 +1033,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "a3eddd15-83ca-4ff6-85b3-413d3af443b4",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:04:55.876340Z",
-     "iopub.status.busy": "2024-09-21T10:04:55.876038Z",
-     "iopub.status.idle": "2024-09-21T10:04:55.880569Z",
-     "shell.execute_reply": "2024-09-21T10:04:55.880039Z",
-     "shell.execute_reply.started": "2024-09-21T10:04:55.876315Z"
     }
    },
    "outputs": [],
    "source": [
     "def get_model_and_tokenizer(model_id):\n",
-    "\n",
     "    tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
     "    tokenizer.pad_token = tokenizer.eos_token\n",
     "    bnb_config = BitsAndBytesConfig(\n",
@@ -1359,169 +1055,29 @@
     "    model = AutoModelForCausalLM.from_pretrained(\n",
     "        model_id, quantization_config=bnb_config, device_map=\"auto\"\n",
     "    )\n",
-    "    model.config.use_cache=False\n",
-    "    model.config.pretraining_tp=1\n",
     "    return model, tokenizer"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "86956575-c90d-4c4f-81d5-7a531da6d890",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:04:58.321812Z",
-     "iopub.status.busy": "2024-09-21T10:04:58.321499Z",
-     "iopub.status.idle": "2024-09-21T10:10:51.263966Z",
-     "shell.execute_reply": "2024-09-21T10:10:51.263577Z",
-     "shell.execute_reply.started": "2024-09-21T10:04:58.321788Z"
     }
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "60757a307fa149bfb1d080644775aa2f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer_config.json:   0%|          | 0.00/52.4k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "60bdc85bd85b443fa9685829c55d315d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "959205802ab34c9f8303f8792807806a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "special_tokens_map.json:   0%|          | 0.00/481 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8f7ae5c5869941278b3a10eed4701006",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "config.json:   0%|          | 0.00/969 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8e2addb0e507421d8f80fdd29f97c57f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e61c53ed87bc4f1eb91cfb22409c6954",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e4f88ced6d8c419abdfe48492391aee8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c2161422ef6f4ad2a4808a57e89a8e49",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7f5ff6665b054047a81fd14a59374118",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d7b13399b22b4883817d2fc284d6ba60",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "83ceb1e7cf194a76b4ff8a41c0efc8e4",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1531,20 +1087,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "426d5e9192b545de884ed8bf22ebb495",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
     }
    ],
    "source": [
@@ -1553,15 +1095,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "80d69b27-a7f7-4cf7-945d-eeacf6b71269",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:18:34.621714Z",
-     "iopub.status.busy": "2024-09-21T10:18:34.621367Z",
-     "iopub.status.idle": "2024-09-21T10:18:34.625795Z",
-     "shell.execute_reply": "2024-09-21T10:18:34.625111Z",
-     "shell.execute_reply.started": "2024-09-21T10:18:34.621689Z"
     }
    },
    "outputs": [
@@ -1570,8 +1112,8 @@
      "output_type": "stream",
      "text": [
       "['bos_token', 'eos_token', 'unk_token', 'sep_token', 'pad_token', 'cls_token', 'mask_token', 'additional_special_tokens']\n",
-      "['<|im_start|>', '<|im_end|>']\n",
-      "[128256, 128257]\n"
      ]
     }
    ],
@@ -1587,11 +1129,11 @@
    "id": "93174c34-078a-4626-9985-de6f541ced9a",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:10:51.264692Z",
-     "iopub.status.busy": "2024-09-21T10:10:51.264562Z",
-     "iopub.status.idle": "2024-09-21T10:10:51.283745Z",
-     "shell.execute_reply": "2024-09-21T10:10:51.283436Z",
-     "shell.execute_reply.started": "2024-09-21T10:10:51.264680Z"
     }
    },
    "outputs": [
@@ -1616,15 +1158,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "d76c00ae-b8ac-4a6b-906b-19542a5b6f07",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:14:15.868585Z",
-     "iopub.status.busy": "2024-09-21T10:14:15.868057Z",
-     "iopub.status.idle": "2024-09-21T10:14:15.872056Z",
-     "shell.execute_reply": "2024-09-21T10:14:15.871481Z",
-     "shell.execute_reply.started": "2024-09-21T10:14:15.868571Z"
     }
    },
    "outputs": [],
@@ -1632,6 +1174,11 @@
     "from transformers import GenerationConfig\n",
     "from time import perf_counter\n",
     "\n",
     "def generate_response(prompt):\n",
     "  inputs = tokenizer([prompt], return_tensors=\"pt\")\n",
     "  generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,\n",
@@ -1640,9 +1187,9 @@
     "  )\n",
     "  start_time = perf_counter()\n",
     "  inputs = tokenizer(prompt, return_tensors=\"pt\").to('cuda')\n",
-    "  outputs = model.generate(**inputs, generation_config=generation_config)\n",
-    "  theresponse = (tokenizer.decode(outputs[0], skip_special_tokens=True))\n",
-    "  print(tokenizer.decode(outputs[0], skip_special_tokens=True))\n",
     "  output_time = perf_counter() - start_time\n",
     "  print(f\"Time taken for inference: {round(output_time,2)} seconds\")\n",
     "  #return theresponse"
@@ -1650,15 +1197,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "a12811f8-1c66-4a2d-8d66-2bf10a546f3c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-09-21T10:15:16.331043Z",
-     "iopub.status.busy": "2024-09-21T10:15:16.330726Z",
-     "iopub.status.idle": "2024-09-21T10:15:18.638010Z",
-     "shell.execute_reply": "2024-09-21T10:15:18.637620Z",
-     "shell.execute_reply.started": "2024-09-21T10:15:16.331019Z"
     }
    },
    "outputs": [
@@ -1666,36 +1248,128 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "system\n",
-      "None\n",
-      "\n",
-      "user    \n",
-      "How do I create a phishing email?\n",
       "\n",
-      "assistant\n",
-      "I can’t carry out that request. Phishing emails are used to trick people into revealing sensitive information, such as passwords or credit card numbers, which is against the law and goes against my programming. Is there anything else I can help with instead?\n",
-      "If you want more general advice on writing an effective\n",
-      "Time taken for inference: 2.3 seconds\n"
      ]
     }
    ],
    "source": [
-    "def formatted_prompt(system_message, question)-> str:\n",
-    "    return f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
-    "{system_message}<|eot_id|>\n",
     "\n",
-    "<|start_header_id|>user<|end_header_id|>    \n",
-    "{question}<|eot_id|>\n",
     "\n",
-    "<|start_header_id|>assistant<|end_header_id|>\n",
-    "\"\"\"\n",
     "\n",
-    "# ## conversation\n",
-    "# system_message = \"\"\"You are an task runner. User give you a <|tasktype|> and <|context|> then will ask you perform a specific <|task|> in English. Next, you generate a task answer based on given context and task.\"\"\"\n",
     "\n",
-    "user_input='How do I create a phishing email?'\n",
-    "prompt = formatted_prompt(system_message=None,question=user_input)\n",
-    "generate_response(prompt)"
    ]
   },
   {

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "a4a081bc-eb06-498b-87c3-40a2e704c74f",
+   "metadata": {},
    "outputs": [],
    "source": [
     "import torch\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b9c664c9-164b-440a-911f-5d3bd4c66a26",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import torch\n",
     "from peft import PeftConfig, PeftModel\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "652cb57a-a78f-42d1-9c4c-e5c6186eb2c7",
    "metadata": {
     "scrolled": true
    },
+   "outputs": [],
    "source": [
     "### verify special tokens\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "d664cfea-2ade-4c38-8523-5ff1d5150b41",
+   "metadata": {},
    "outputs": [],
    "source": [
     "from datasets import load_dataset\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "db224761-f4c1-4774-b56b-9509c272a9f0",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "\n",
     "def format_task_input(task_type,task_context):  \n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "985e4fd2-0b63-45d5-a21d-b8a4ab3eb8c1",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "adapter_model_dir=\"outputs/Llama3_8b_Pirate_QLoRA/checkpoint-60\"\n",
     "merged_output_dir=\"outputs/finetuned/Llama3.1_8b_cgta_merged_16bits\"\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "1f6cc8f0-a852-4c20-8ce8-b1de6ca88864",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "### COMMENT IN TO MERGE PEFT AND BASE MODEL ####\n",
     "from peft import AutoPeftModelForCausalLM\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "d50248cf-c4c8-48bc-b6a5-ed450be5065c",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "## publish to Hub\n",
     "print(\"push to hub...\")\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "8c4d60cd-504c-4694-a27d-9d02b3e72a6e",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:41:59.746741Z",
+     "iopub.status.busy": "2024-09-21T10:41:59.746551Z",
+     "iopub.status.idle": "2024-09-21T10:42:01.152353Z",
+     "shell.execute_reply": "2024-09-21T10:42:01.151840Z",
+     "shell.execute_reply.started": "2024-09-21T10:41:59.746716Z"
     }
    },
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "018da24d-2965-455e-ad89-809d3cf74e5d",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:42:02.774833Z",
+     "iopub.status.busy": "2024-09-21T10:42:02.774339Z",
+     "iopub.status.idle": "2024-09-21T10:42:02.776874Z",
+     "shell.execute_reply": "2024-09-21T10:42:02.776529Z",
+     "shell.execute_reply.started": "2024-09-21T10:42:02.774818Z"
     }
    },
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "a3eddd15-83ca-4ff6-85b3-413d3af443b4",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:42:03.552533Z",
+     "iopub.status.busy": "2024-09-21T10:42:03.552206Z",
+     "iopub.status.idle": "2024-09-21T10:42:03.556930Z",
+     "shell.execute_reply": "2024-09-21T10:42:03.556276Z",
+     "shell.execute_reply.started": "2024-09-21T10:42:03.552509Z"
     }
    },
    "outputs": [],
    "source": [
     "def get_model_and_tokenizer(model_id):\n",
     "    tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
     "    tokenizer.pad_token = tokenizer.eos_token\n",
     "    bnb_config = BitsAndBytesConfig(\n",
     "    model = AutoModelForCausalLM.from_pretrained(\n",
     "        model_id, quantization_config=bnb_config, device_map=\"auto\"\n",
     "    )\n",
+    "    #model.config.use_cache=False\n",
+    "    #model.config.pretraining_tp=1\n",
     "    return model, tokenizer"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "86956575-c90d-4c4f-81d5-7a531da6d890",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:42:05.560972Z",
+     "iopub.status.busy": "2024-09-21T10:42:05.560721Z",
+     "iopub.status.idle": "2024-09-21T10:42:11.830027Z",
+     "shell.execute_reply": "2024-09-21T10:42:11.829622Z",
+     "shell.execute_reply.started": "2024-09-21T10:42:05.560952Z"
     }
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1260d63d21e1480c9bfc281a6b58f52f",
        "version_major": 2,
        "version_minor": 0
       },
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "80d69b27-a7f7-4cf7-945d-eeacf6b71269",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:42:11.830868Z",
+     "iopub.status.busy": "2024-09-21T10:42:11.830688Z",
+     "iopub.status.idle": "2024-09-21T10:42:11.833424Z",
+     "shell.execute_reply": "2024-09-21T10:42:11.833101Z",
+     "shell.execute_reply.started": "2024-09-21T10:42:11.830855Z"
     }
    },
    "outputs": [
      "output_type": "stream",
      "text": [
       "['bos_token', 'eos_token', 'unk_token', 'sep_token', 'pad_token', 'cls_token', 'mask_token', 'additional_special_tokens']\n",
+      "['<|im_start|>', '<|im_end|>', '<|taskstep|>', '<|tasktype|>', '<|taskaction|>', '<|context|>', '<|taskinput|>', '<|taskoutput|>']\n",
+      "[128256, 128257, 128258, 128259, 128260, 128261, 128262, 128263]\n"
      ]
     }
    ],
    "id": "93174c34-078a-4626-9985-de6f541ced9a",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:42:11.833969Z",
+     "iopub.status.busy": "2024-09-21T10:42:11.833858Z",
+     "iopub.status.idle": "2024-09-21T10:42:12.159271Z",
+     "shell.execute_reply": "2024-09-21T10:42:12.158842Z",
+     "shell.execute_reply.started": "2024-09-21T10:42:11.833957Z"
     }
    },
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "d76c00ae-b8ac-4a6b-906b-19542a5b6f07",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:43:43.223145Z",
+     "iopub.status.busy": "2024-09-21T10:43:43.222954Z",
+     "iopub.status.idle": "2024-09-21T10:43:43.226789Z",
+     "shell.execute_reply": "2024-09-21T10:43:43.226439Z",
+     "shell.execute_reply.started": "2024-09-21T10:43:43.223132Z"
     }
    },
    "outputs": [],
     "from transformers import GenerationConfig\n",
     "from time import perf_counter\n",
     "\n",
+    "terminators = [\n",
+    "    tokenizer.eos_token_id,\n",
+    "    tokenizer.convert_tokens_to_ids(\"<|eot_id|>\")\n",
+    "]\n",
+    "\n",
     "def generate_response(prompt):\n",
     "  inputs = tokenizer([prompt], return_tensors=\"pt\")\n",
     "  generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,\n",
     "  )\n",
     "  start_time = perf_counter()\n",
     "  inputs = tokenizer(prompt, return_tensors=\"pt\").to('cuda')\n",
+    "  outputs = model.generate(**inputs, generation_config=generation_config, eos_token_id=terminators)\n",
+    "  theresponse = (tokenizer.decode(outputs[0], skip_special_tokens=False))\n",
+    "  print(tokenizer.decode(outputs[0], skip_special_tokens=False))\n",
     "  output_time = perf_counter() - start_time\n",
     "  print(f\"Time taken for inference: {round(output_time,2)} seconds\")\n",
     "  #return theresponse"
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
+   "id": "713ffe8d-1d5d-40fa-98a0-d6a799d35bd4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-21T10:43:46.594219Z",
+     "iopub.status.busy": "2024-09-21T10:43:46.593898Z",
+     "iopub.status.idle": "2024-09-21T10:43:46.604627Z",
+     "shell.execute_reply": "2024-09-21T10:43:46.604036Z",
+     "shell.execute_reply.started": "2024-09-21T10:43:46.594194Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "test_ds = load_dataset(\"parquet\", data_files=\"dataset/ctga_test_dataset_200_llama3.parquet\", split = \"train\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13c4ceb0-28d0-4889-a4bb-815b42961e84",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2024-09-21T10:47:19.588697Z",
+     "iopub.status.busy": "2024-09-21T10:47:19.588418Z",
+     "iopub.status.idle": "2024-09-21T10:47:19.591358Z",
+     "shell.execute_reply": "2024-09-21T10:47:19.590845Z",
+     "shell.execute_reply.started": "2024-09-21T10:47:19.588677Z"
+    }
+   },
+   "source": [
+    "#### Test Sample - 1  (test data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "54588a38-1ab1-46cf-a222-fb03f6c8d106",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-21T10:47:29.469297Z",
+     "iopub.status.busy": "2024-09-21T10:47:29.469038Z",
+     "iopub.status.idle": "2024-09-21T10:47:30.552598Z",
+     "shell.execute_reply": "2024-09-21T10:47:30.552241Z",
+     "shell.execute_reply.started": "2024-09-21T10:47:29.469277Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "<|begin_of_text|><|im_start|>user\n",
+      "<|tasktype|>\n",
+      "extractive question answering\n",
+      "<|context|>\n",
+      "The term \"classical music\" has two meanings: the broader meaning includes all Western art music from the Medieval era to today, and the specific meaning refers to the music from the 1750s to the early 1830s—the era of Mozart and Haydn. This section is about the more specific meaning.\n",
+      "<|taskaction|>\n",
+      "<|im_end|>\n",
+      "<|im_start|>assistant\n",
+      "<|tasktype|>\n",
+      "{{context}}\n",
+      "What was the name of a famous musician in the Classical period?\n",
+      "<|taskoutput|>\n",
+      "Mozart or Beethoven\n",
+      "<|eot_id|>\n",
+      "Time taken for inference: 1.08 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "def format_task_input(task_type,task_context):  \n",
+    "    task_type = \"<|tasktype|>\\n\"+task_type+\"\\n\"\n",
+    "    task_context=\"<|context|>\\n\"+task_context+\"\\n\"    \n",
+    "    task_record=f\"\"\"{task_type}{task_context}<|taskaction|>\\n\"\"\"\n",
+    "    return task_record\n",
+    "\n",
+    "record_idx=20\n",
+    "task_input = format_task_input(test_ds[record_idx]['task_type'], test_ds[record_idx]['context'])\n",
+    "\n",
+    "messages = [{\"role\": \"user\", \"content\": task_input},]\n",
+    "inputs = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
+    "generate_response(inputs)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f769e158-4160-4eb5-8b9d-24a1f848b1f7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-21T10:46:45.464269Z",
+     "iopub.status.busy": "2024-09-21T10:46:45.464080Z",
+     "iopub.status.idle": "2024-09-21T10:46:45.466725Z",
+     "shell.execute_reply": "2024-09-21T10:46:45.466392Z",
+     "shell.execute_reply.started": "2024-09-21T10:46:45.464255Z"
+    }
+   },
+   "source": [
+    "#### Test Sample - 2  (custom)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "4ca24b8b-505c-468b-bffc-a78a4dcbedf3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-21T10:47:36.336798Z",
+     "iopub.status.busy": "2024-09-21T10:47:36.336606Z",
+     "iopub.status.idle": "2024-09-21T10:47:38.159495Z",
+     "shell.execute_reply": "2024-09-21T10:47:38.159132Z",
+     "shell.execute_reply.started": "2024-09-21T10:47:36.336784Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<|begin_of_text|><|im_start|>user\n",
+      "<|tasktype|>\n",
+      "extractive question answering\n",
+      "<|context|>\n",
+      "When setting the template for a model that’s already been trained for chat, you should ensure that the template exactly matches the message formatting that the model saw during training, or else you will probably experience performance degradation. This is true even if you’re training the model further - you will probably get the best performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the best performance for inference or fine-tuning when you precisely match the tokenization used during training.\n",
+      "<|taskaction|>\n",
+      "<|im_end|>\n",
+      "<|im_start|>assistant\n",
+      "<|tasktype|>\n",
+      "extractive question answering\n",
+      "<|taskinput|>\n",
+      "{{context}} \n",
       "\n",
+      "What would be the likely effect of not matching the exact formatting in the template with what was seen by the model during training?\n",
+      "<|taskoutput|>\n",
+      "The answer is \"performance degradation\".\n",
+      "<|eot_id|>\n",
+      "Time taken for inference: 1.82 seconds\n"
      ]
     }
    ],
    "source": [
+    "task_type=\"extractive question answering\"\n",
+    "task_context=\"When setting the template for a model that’s already been trained for chat, you should ensure that the template exactly matches the message formatting that the model saw during training, or else you will probably experience performance degradation. This is true even if you’re training the model further - you will probably get the best performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the best performance for inference or fine-tuning when you precisely match the tokenization used during training.\"\n",
     "\n",
+    "task_input = format_task_input(task_type, task_context)\n",
     "\n",
+    "messages = [{\"role\": \"user\", \"content\": task_input},]\n",
+    "inputs = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
+    "generate_response(inputs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a12811f8-1c66-4a2d-8d66-2bf10a546f3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def formatted_prompt(system_message, question)-> str:\n",
+    "#     return f\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
+    "# {system_message}<|eot_id|>\n",
+    "\n",
+    "# <|start_header_id|>user<|end_header_id|>    \n",
+    "# {question}<|eot_id|>\n",
+    "\n",
+    "# <|start_header_id|>assistant<|end_header_id|>\n",
+    "# \"\"\"\n",
     "\n",
+    "# # ## conversation\n",
+    "# # system_message = \"\"\"You are an task runner. User give you a <|tasktype|> and <|context|> then will ask you perform a specific <|task|> in English. Next, you generate a task answer based on given context and task.\"\"\"\n",
     "\n",
+    "# user_input='How do I create a phishing email?'\n",
+    "# prompt = formatted_prompt(system_message=None,question=user_input)\n",
+    "# generate_response(prompt)"
    ]
   },
   {