Upload AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb

Browse files

Files changed (1) hide show

AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb +1123 -0

AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb ADDED Viewed

	@@ -0,0 +1,1123 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "JvMRbVLEJlZT",
+    "outputId": "306df70b-0c75-4781-a75b-d10957a028a3"
+   },
+   "outputs": [],
+   "source": [
+    "# #@title 🤗 AutoTrain LLM\n",
+    "# #@markdown In order to use this colab\n",
+    "# #@markdown - upload train.csv to a folder named `data/`\n",
+    "# #@markdown - train.csv must contain a `text` column\n",
+    "# #@markdown - choose a project name if you wish\n",
+    "# #@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
+    "# #@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
+    "# #@markdown - update hyperparameters if you wish\n",
+    "# #@markdown - click `Runtime > Run all` or run each cell individually\n",
+    "\n",
+    "import os\n",
+    "# !pip install -U autotrain-advanced > install_logs.txt\n",
+    "# !autotrain setup > setup_logs.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pip install transformers --upgrad\n",
+    "# !pip install torch --upgrade\n",
+    "# !pip install tokenizers --upgrade"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "A2-_lkBS1WKA"
+   },
+   "outputs": [],
+   "source": [
+    "#@markdown ---\n",
+    "#@markdown #### Project Config\n",
+    "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
+    "project_name = 'my_autotrain_llm_sys_temp_meta_llama_chat' # @param {type:\"string\"}\n",
+    "model_name =    \"meta-llama/Llama-2-7b-chat-hf\"         # 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
+    "\n",
+    "#@markdown ---\n",
+    "#@markdown #### Push to Hub?\n",
+    "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
+    "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
+    "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
+    "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
+    "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+    "hf_token = \"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\" #@param {type:\"string\"}\n",
+    "repo_id = \"hemantk089/llm_fine_tuning\" #@param {type:\"string\"}\n",
+    "\n",
+    "#@markdown ---\n",
+    "#@markdown #### Hyperparameters\n",
+    "learning_rate = 2e-4 # @param {type:\"number\"}\n",
+    "num_epochs = 200 #@param {type:\"number\"}\n",
+    "batch_size = 1 # @param {type:\"slider\", min:1, max:32, step:1}\n",
+    "block_size = 1024 # @param {type:\"number\"}\n",
+    "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
+    "warmup_ratio = 0.1 # @param {type:\"number\"}\n",
+    "weight_decay = 0.01 # @param {type:\"number\"}\n",
+    "gradient_accumulation = 4 # @param {type:\"number\"}\n",
+    "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+    "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+    "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+    "lora_r = 16 #@param {type:\"number\"}\n",
+    "lora_alpha = 32 #@param {type:\"number\"}\n",
+    "lora_dropout = 0.05 #@param {type:\"number\"}\n",
+    "\n",
+    "os.environ[\"PROJECT_NAME\"] = project_name\n",
+    "os.environ[\"MODEL_NAME\"] = model_name\n",
+    "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
+    "os.environ[\"HF_TOKEN\"] = hf_token\n",
+    "os.environ[\"REPO_ID\"] = repo_id\n",
+    "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
+    "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
+    "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
+    "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
+    "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
+    "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
+    "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
+    "os.environ[\"USE_FP16\"] = str(use_fp16)\n",
+    "os.environ[\"USE_PEFT\"] = str(use_peft)\n",
+    "os.environ[\"USE_INT4\"] = str(use_int4)\n",
+    "os.environ[\"LORA_R\"] = str(lora_r)\n",
+    "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
+    "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "g3cd_ED_yXXt",
+    "outputId": "d753c017-cf19-4822-b8ea-c9e6b70fc2d1"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-10-04 14:41:59,153] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
+      "> \u001b[1mINFO    Running LLM\u001b[0m\n",
+      "> \u001b[1mINFO    Params: Namespace(add_eos_token=False, auto_find_batch_size=False, backend='default', block_size=1024, data_path='data/', deploy=False, evaluation_strategy='epoch', fp16=True, func=<function run_llm_command_factory at 0x1468523be1f0>, gradient_accumulation_steps=4, inference=False, learning_rate=0.0002, logging_steps=-1, lora_alpha=32, lora_dropout=0.05, lora_r=16, max_grad_norm=1.0, merge_adapter=False, model='meta-llama/Llama-2-7b-chat-hf', model_max_length=1024, num_train_epochs=200, optimizer='adamw_torch', project_name='my_autotrain_llm_sys_temp_meta_llama_chat', push_to_hub=False, repo_id=None, save_strategy='epoch', save_total_limit=1, scheduler='linear', seed=42, target_modules=None, text_column='text', token=None, train=True, train_batch_size=1, train_split='train', trainer='default', use_flash_attention_2=False, use_int4=True, use_int8=False, use_peft=True, username=None, valid_split=None, version=False, warmup_ratio=0.1, weight_decay=0.01)\u001b[0m\n",
+      "> \u001b[1mINFO    loading dataset from csv\u001b[0m\n",
+      "Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`,  it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.\n",
+      "Using pad_token, but it is not set yet.\n",
+      "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00,  1.78s/it]\n",
+      "You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc\n",
+      "Running tokenizer on train dataset: 100%|█| 90/90 [00:00<00:00, 8654.20 examples\n",
+      "Grouping texts in chunks of 1024 (num_proc=4): 100%|█| 90/90 [00:00<00:00, 729.7\n",
+      "> \u001b[1mINFO    creating trainer\u001b[0m\n",
+      "{'loss': 2.5454, 'learning_rate': 1e-05, 'epoch': 1.0}                          \n",
+      "{'loss': 2.5454, 'learning_rate': 2e-05, 'epoch': 2.0}                          \n",
+      "{'loss': 2.5344, 'learning_rate': 3e-05, 'epoch': 3.0}                          \n",
+      "{'loss': 2.505, 'learning_rate': 4e-05, 'epoch': 4.0}                           \n",
+      "{'loss': 2.4569, 'learning_rate': 5e-05, 'epoch': 5.0}                          \n",
+      "{'loss': 2.3922, 'learning_rate': 6e-05, 'epoch': 6.0}                          \n",
+      "{'loss': 2.3117, 'learning_rate': 7e-05, 'epoch': 7.0}                          \n",
+      "{'loss': 2.2224, 'learning_rate': 8e-05, 'epoch': 8.0}                          \n",
+      "{'loss': 2.1331, 'learning_rate': 9e-05, 'epoch': 9.0}                          \n",
+      "{'loss': 2.0432, 'learning_rate': 0.0001, 'epoch': 10.0}                        \n",
+      "{'loss': 1.9529, 'learning_rate': 0.00011000000000000002, 'epoch': 11.0}        \n",
+      "{'loss': 1.8601, 'learning_rate': 0.00012, 'epoch': 12.0}                       \n",
+      "{'loss': 1.7627, 'learning_rate': 0.00013000000000000002, 'epoch': 13.0}        \n",
+      "{'loss': 1.6574, 'learning_rate': 0.00014, 'epoch': 14.0}                       \n",
+      "{'loss': 1.5425, 'learning_rate': 0.00015000000000000001, 'epoch': 15.0}        \n",
+      "{'loss': 1.4224, 'learning_rate': 0.00016, 'epoch': 16.0}                       \n",
+      "{'loss': 1.3218, 'learning_rate': 0.00017, 'epoch': 17.0}                       \n",
+      "{'loss': 1.2318, 'learning_rate': 0.00018, 'epoch': 18.0}                       \n",
+      "{'loss': 1.1835, 'learning_rate': 0.00019, 'epoch': 19.0}                       \n",
+      "{'loss': 1.2394, 'learning_rate': 0.0002, 'epoch': 20.0}                        \n",
+      "{'loss': 1.2134, 'learning_rate': 0.0001988888888888889, 'epoch': 21.0}         \n",
+      "{'loss': 1.1547, 'learning_rate': 0.00019777777777777778, 'epoch': 22.0}        \n",
+      "{'loss': 1.1043, 'learning_rate': 0.00019666666666666666, 'epoch': 23.0}        \n",
+      "{'loss': 1.0702, 'learning_rate': 0.00019555555555555556, 'epoch': 24.0}        \n",
+      "{'loss': 1.0691, 'learning_rate': 0.00019444444444444446, 'epoch': 25.0}        \n",
+      "{'loss': 1.043, 'learning_rate': 0.00019333333333333333, 'epoch': 26.0}         \n",
+      "{'loss': 1.0063, 'learning_rate': 0.00019222222222222224, 'epoch': 27.0}        \n",
+      "{'loss': 0.9849, 'learning_rate': 0.00019111111111111114, 'epoch': 28.0}        \n",
+      "{'loss': 0.9743, 'learning_rate': 0.00019, 'epoch': 29.0}                       \n",
+      "{'loss': 0.9561, 'learning_rate': 0.00018888888888888888, 'epoch': 30.0}        \n",
+      "{'loss': 0.933, 'learning_rate': 0.00018777777777777779, 'epoch': 31.0}         \n",
+      "{'loss': 0.9131, 'learning_rate': 0.0001866666666666667, 'epoch': 32.0}         \n",
+      "{'loss': 0.9, 'learning_rate': 0.00018555555555555556, 'epoch': 33.0}           \n",
+      "{'loss': 0.8814, 'learning_rate': 0.00018444444444444446, 'epoch': 34.0}        \n",
+      "{'loss': 0.8579, 'learning_rate': 0.00018333333333333334, 'epoch': 35.0}        \n",
+      "{'loss': 0.836, 'learning_rate': 0.00018222222222222224, 'epoch': 36.0}         \n",
+      "{'loss': 0.8163, 'learning_rate': 0.0001811111111111111, 'epoch': 37.0}         \n",
+      "{'loss': 0.7901, 'learning_rate': 0.00018, 'epoch': 38.0}                       \n",
+      "{'loss': 0.7646, 'learning_rate': 0.0001788888888888889, 'epoch': 39.0}         \n",
+      "{'loss': 0.743, 'learning_rate': 0.00017777777777777779, 'epoch': 40.0}         \n",
+      "{'loss': 0.7167, 'learning_rate': 0.00017666666666666666, 'epoch': 41.0}        \n",
+      "{'loss': 0.6906, 'learning_rate': 0.00017555555555555556, 'epoch': 42.0}        \n",
+      "{'loss': 0.6648, 'learning_rate': 0.00017444444444444446, 'epoch': 43.0}        \n",
+      "{'loss': 0.6364, 'learning_rate': 0.00017333333333333334, 'epoch': 44.0}        \n",
+      "{'loss': 0.6111, 'learning_rate': 0.00017222222222222224, 'epoch': 45.0}        \n",
+      "{'loss': 0.5821, 'learning_rate': 0.0001711111111111111, 'epoch': 46.0}         \n",
+      "{'loss': 0.5554, 'learning_rate': 0.00017, 'epoch': 47.0}                       \n",
+      "{'loss': 0.5251, 'learning_rate': 0.00016888888888888889, 'epoch': 48.0}        \n",
+      "{'loss': 0.4958, 'learning_rate': 0.0001677777777777778, 'epoch': 49.0}         \n",
+      "{'loss': 0.4603, 'learning_rate': 0.0001666666666666667, 'epoch': 50.0}         \n",
+      "{'loss': 0.4281, 'learning_rate': 0.00016555555555555556, 'epoch': 51.0}        \n",
+      "{'loss': 0.3929, 'learning_rate': 0.00016444444444444444, 'epoch': 52.0}        \n",
+      "{'loss': 0.3468, 'learning_rate': 0.00016333333333333334, 'epoch': 53.0}        \n",
+      "{'loss': 0.3018, 'learning_rate': 0.00016222222222222224, 'epoch': 54.0}        \n",
+      "{'loss': 0.2756, 'learning_rate': 0.00016222222222222224, 'epoch': 55.0}        \n",
+      "{'loss': 0.2626, 'learning_rate': 0.0001611111111111111, 'epoch': 56.0}         \n",
+      "{'loss': 0.2279, 'learning_rate': 0.00016, 'epoch': 57.0}                       \n",
+      "{'loss': 0.2086, 'learning_rate': 0.0001588888888888889, 'epoch': 58.0}         \n",
+      "{'loss': 0.1972, 'learning_rate': 0.0001577777777777778, 'epoch': 59.0}         \n",
+      "{'loss': 0.179, 'learning_rate': 0.00015666666666666666, 'epoch': 60.0}         \n",
+      "{'loss': 0.156, 'learning_rate': 0.00015555555555555556, 'epoch': 61.0}         \n",
+      "{'loss': 0.1361, 'learning_rate': 0.00015444444444444446, 'epoch': 62.0}        \n",
+      "{'loss': 0.1177, 'learning_rate': 0.00015333333333333334, 'epoch': 63.0}        \n",
+      "{'loss': 0.1009, 'learning_rate': 0.0001522222222222222, 'epoch': 64.0}         \n",
+      "{'loss': 0.0834, 'learning_rate': 0.0001511111111111111, 'epoch': 65.0}         \n",
+      "{'loss': 0.0682, 'learning_rate': 0.00015000000000000001, 'epoch': 66.0}        \n",
+      "{'loss': 0.057, 'learning_rate': 0.0001488888888888889, 'epoch': 67.0}          \n",
+      "{'loss': 0.0469, 'learning_rate': 0.0001477777777777778, 'epoch': 68.0}         \n",
+      "{'loss': 0.0382, 'learning_rate': 0.00014666666666666666, 'epoch': 69.0}        \n",
+      "{'loss': 0.0327, 'learning_rate': 0.00014555555555555556, 'epoch': 70.0}        \n",
+      "{'loss': 0.0272, 'learning_rate': 0.00014444444444444444, 'epoch': 71.0}        \n",
+      "{'loss': 0.0222, 'learning_rate': 0.00014333333333333334, 'epoch': 72.0}        \n",
+      "{'loss': 0.0191, 'learning_rate': 0.00014222222222222224, 'epoch': 73.0}        \n",
+      "{'loss': 0.0165, 'learning_rate': 0.00014111111111111111, 'epoch': 74.0}        \n",
+      "{'loss': 0.0145, 'learning_rate': 0.00014, 'epoch': 75.0}                       \n",
+      "{'loss': 0.0133, 'learning_rate': 0.0001388888888888889, 'epoch': 76.0}         \n",
+      "{'loss': 0.0122, 'learning_rate': 0.0001377777777777778, 'epoch': 77.0}         \n",
+      "{'loss': 0.011, 'learning_rate': 0.00013666666666666666, 'epoch': 78.0}         \n",
+      "{'loss': 0.0104, 'learning_rate': 0.00013555555555555556, 'epoch': 79.0}        \n",
+      "{'loss': 0.0099, 'learning_rate': 0.00013444444444444447, 'epoch': 80.0}        \n",
+      "{'loss': 0.0094, 'learning_rate': 0.00013333333333333334, 'epoch': 81.0}        \n",
+      "{'loss': 0.009, 'learning_rate': 0.00013222222222222221, 'epoch': 82.0}         \n",
+      "{'loss': 0.0088, 'learning_rate': 0.00013111111111111111, 'epoch': 83.0}        \n",
+      "{'loss': 0.0087, 'learning_rate': 0.00013000000000000002, 'epoch': 84.0}        \n",
+      "{'loss': 0.0083, 'learning_rate': 0.00012888888888888892, 'epoch': 85.0}        \n",
+      "{'loss': 0.0083, 'learning_rate': 0.00012777777777777776, 'epoch': 86.0}        \n",
+      "{'loss': 0.0077, 'learning_rate': 0.00012666666666666666, 'epoch': 87.0}        \n",
+      "{'loss': 0.0077, 'learning_rate': 0.00012555555555555557, 'epoch': 88.0}        \n",
+      "{'loss': 0.0075, 'learning_rate': 0.00012444444444444444, 'epoch': 89.0}        \n",
+      "{'loss': 0.0074, 'learning_rate': 0.00012333333333333334, 'epoch': 90.0}        \n",
+      "{'loss': 0.0072, 'learning_rate': 0.00012222222222222224, 'epoch': 91.0}        \n",
+      "{'loss': 0.0073, 'learning_rate': 0.0001211111111111111, 'epoch': 92.0}         \n",
+      "{'loss': 0.0068, 'learning_rate': 0.00012, 'epoch': 93.0}                       \n",
+      "{'loss': 0.0068, 'learning_rate': 0.00011888888888888889, 'epoch': 94.0}        \n",
+      "{'loss': 0.0067, 'learning_rate': 0.00011777777777777779, 'epoch': 95.0}        \n",
+      "{'loss': 0.0066, 'learning_rate': 0.00011666666666666668, 'epoch': 96.0}        \n",
+      "{'loss': 0.0065, 'learning_rate': 0.00011555555555555555, 'epoch': 97.0}        \n",
+      "{'loss': 0.0065, 'learning_rate': 0.00011444444444444444, 'epoch': 98.0}        \n",
+      "{'loss': 0.0062, 'learning_rate': 0.00011333333333333334, 'epoch': 99.0}        \n",
+      "{'loss': 0.0062, 'learning_rate': 0.00011222222222222223, 'epoch': 100.0}       \n",
+      "{'loss': 0.0064, 'learning_rate': 0.00011111111111111112, 'epoch': 101.0}       \n",
+      "{'loss': 0.006, 'learning_rate': 0.00011000000000000002, 'epoch': 102.0}        \n",
+      "{'loss': 0.0061, 'learning_rate': 0.00010888888888888889, 'epoch': 103.0}       \n",
+      "{'loss': 0.0061, 'learning_rate': 0.00010777777777777778, 'epoch': 104.0}       \n",
+      "{'loss': 0.0061, 'learning_rate': 0.00010666666666666667, 'epoch': 105.0}       \n",
+      "{'loss': 0.0062, 'learning_rate': 0.00010555555555555557, 'epoch': 106.0}       \n",
+      "{'loss': 0.006, 'learning_rate': 0.00010444444444444445, 'epoch': 107.0}        \n",
+      "{'loss': 0.0061, 'learning_rate': 0.00010333333333333334, 'epoch': 108.0}       \n",
+      "{'loss': 0.0059, 'learning_rate': 0.00010222222222222222, 'epoch': 109.0}       \n",
+      "{'loss': 0.0059, 'learning_rate': 0.00010111111111111112, 'epoch': 110.0}       \n",
+      "{'loss': 0.0059, 'learning_rate': 0.0001, 'epoch': 111.0}                       \n",
+      "{'loss': 0.0058, 'learning_rate': 9.888888888888889e-05, 'epoch': 112.0}        \n",
+      "{'loss': 0.0058, 'learning_rate': 9.777777777777778e-05, 'epoch': 113.0}        \n",
+      "{'loss': 0.0057, 'learning_rate': 9.666666666666667e-05, 'epoch': 114.0}        \n",
+      "{'loss': 0.0057, 'learning_rate': 9.555555555555557e-05, 'epoch': 115.0}        \n",
+      "{'loss': 0.0057, 'learning_rate': 9.444444444444444e-05, 'epoch': 116.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 9.333333333333334e-05, 'epoch': 117.0}        \n",
+      "{'loss': 0.0057, 'learning_rate': 9.222222222222223e-05, 'epoch': 118.0}        \n",
+      "{'loss': 0.0057, 'learning_rate': 9.111111111111112e-05, 'epoch': 119.0}        \n",
+      "{'loss': 0.0057, 'learning_rate': 9e-05, 'epoch': 120.0}                        \n",
+      "{'loss': 0.0057, 'learning_rate': 8.888888888888889e-05, 'epoch': 121.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.777777777777778e-05, 'epoch': 122.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.666666666666667e-05, 'epoch': 123.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.555555555555556e-05, 'epoch': 124.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.444444444444444e-05, 'epoch': 125.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.333333333333334e-05, 'epoch': 126.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.222222222222222e-05, 'epoch': 127.0}        \n",
+      "{'loss': 0.0056, 'learning_rate': 8.111111111111112e-05, 'epoch': 128.0}        \n",
+      "{'loss': 0.0054, 'learning_rate': 8e-05, 'epoch': 129.0}                        \n",
+      "{'loss': 0.0055, 'learning_rate': 7.88888888888889e-05, 'epoch': 130.0}         \n",
+      "{'loss': 0.0055, 'learning_rate': 7.777777777777778e-05, 'epoch': 131.0}        \n",
+      "{'loss': 0.0055, 'learning_rate': 7.666666666666667e-05, 'epoch': 132.0}        \n",
+      "{'loss': 0.0054, 'learning_rate': 7.555555555555556e-05, 'epoch': 133.0}        \n",
+      "{'loss': 0.0055, 'learning_rate': 7.444444444444444e-05, 'epoch': 134.0}        \n",
+      "{'loss': 0.0055, 'learning_rate': 7.333333333333333e-05, 'epoch': 135.0}        \n",
+      "{'loss': 0.0055, 'learning_rate': 7.222222222222222e-05, 'epoch': 136.0}        \n",
+      "{'loss': 0.0054, 'learning_rate': 7.111111111111112e-05, 'epoch': 137.0}        \n",
+      "{'loss': 0.0054, 'learning_rate': 7e-05, 'epoch': 138.0}                        \n",
+      "{'loss': 0.0053, 'learning_rate': 6.88888888888889e-05, 'epoch': 139.0}         \n",
+      "{'loss': 0.0053, 'learning_rate': 6.777777777777778e-05, 'epoch': 140.0}        \n",
+      "{'loss': 0.0055, 'learning_rate': 6.666666666666667e-05, 'epoch': 141.0}        \n",
+      "{'loss': 0.0053, 'learning_rate': 6.555555555555556e-05, 'epoch': 142.0}        \n",
+      "{'loss': 0.0054, 'learning_rate': 6.444444444444446e-05, 'epoch': 143.0}        \n",
+      "{'loss': 0.0054, 'learning_rate': 6.333333333333333e-05, 'epoch': 144.0}        \n",
+      "{'loss': 0.0052, 'learning_rate': 6.222222222222222e-05, 'epoch': 145.0}        \n",
+      "{'loss': 0.0053, 'learning_rate': 6.111111111111112e-05, 'epoch': 146.0}        \n",
+      "{'loss': 0.0053, 'learning_rate': 6e-05, 'epoch': 147.0}                        \n",
+      "{'loss': 0.0052, 'learning_rate': 5.8888888888888896e-05, 'epoch': 148.0}       \n",
+      "{'loss': 0.0051, 'learning_rate': 5.7777777777777776e-05, 'epoch': 149.0}       \n",
+      "{'loss': 0.0053, 'learning_rate': 5.666666666666667e-05, 'epoch': 150.0}        \n",
+      "{'loss': 0.0052, 'learning_rate': 5.555555555555556e-05, 'epoch': 151.0}        \n",
+      "{'loss': 0.0053, 'learning_rate': 5.4444444444444446e-05, 'epoch': 152.0}       \n",
+      "{'loss': 0.0053, 'learning_rate': 5.333333333333333e-05, 'epoch': 153.0}        \n",
+      "{'loss': 0.0052, 'learning_rate': 5.222222222222223e-05, 'epoch': 154.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 5.111111111111111e-05, 'epoch': 155.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 5e-05, 'epoch': 156.0}                        \n",
+      "{'loss': 0.0053, 'learning_rate': 4.888888888888889e-05, 'epoch': 157.0}        \n",
+      "{'loss': 0.0053, 'learning_rate': 4.7777777777777784e-05, 'epoch': 158.0}       \n",
+      "{'loss': 0.0052, 'learning_rate': 4.666666666666667e-05, 'epoch': 159.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 4.555555555555556e-05, 'epoch': 160.0}        \n",
+      "{'loss': 0.0053, 'learning_rate': 4.4444444444444447e-05, 'epoch': 161.0}       \n",
+      "{'loss': 0.0052, 'learning_rate': 4.3333333333333334e-05, 'epoch': 162.0}       \n",
+      "{'loss': 0.0051, 'learning_rate': 4.222222222222222e-05, 'epoch': 163.0}        \n",
+      "{'loss': 0.0052, 'learning_rate': 4.111111111111111e-05, 'epoch': 164.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 4e-05, 'epoch': 165.0}                        \n",
+      "{'loss': 0.0053, 'learning_rate': 3.888888888888889e-05, 'epoch': 166.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 3.777777777777778e-05, 'epoch': 167.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 3.6666666666666666e-05, 'epoch': 168.0}       \n",
+      "{'loss': 0.0051, 'learning_rate': 3.555555555555556e-05, 'epoch': 169.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 3.444444444444445e-05, 'epoch': 170.0}         \n",
+      "{'loss': 0.005, 'learning_rate': 3.3333333333333335e-05, 'epoch': 171.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 3.222222222222223e-05, 'epoch': 172.0}        \n",
+      "{'loss': 0.0051, 'learning_rate': 3.111111111111111e-05, 'epoch': 173.0}        \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': 0.0051, 'learning_rate': 3e-05, 'epoch': 174.0}                        \n",
+      "{'loss': 0.0051, 'learning_rate': 2.8888888888888888e-05, 'epoch': 175.0}       \n",
+      "{'loss': 0.005, 'learning_rate': 2.777777777777778e-05, 'epoch': 176.0}         \n",
+      "{'loss': 0.0052, 'learning_rate': 2.6666666666666667e-05, 'epoch': 177.0}       \n",
+      "{'loss': 0.005, 'learning_rate': 2.5555555555555554e-05, 'epoch': 178.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 2.4444444444444445e-05, 'epoch': 179.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 2.3333333333333336e-05, 'epoch': 180.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 2.2222222222222223e-05, 'epoch': 181.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 2.111111111111111e-05, 'epoch': 182.0}         \n",
+      "{'loss': 0.005, 'learning_rate': 2e-05, 'epoch': 183.0}                         \n",
+      "{'loss': 0.0049, 'learning_rate': 1.888888888888889e-05, 'epoch': 184.0}        \n",
+      "{'loss': 0.0049, 'learning_rate': 1.777777777777778e-05, 'epoch': 185.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 1.6666666666666667e-05, 'epoch': 186.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 1.5555555555555555e-05, 'epoch': 187.0}        \n",
+      "{'loss': 0.0049, 'learning_rate': 1.4444444444444444e-05, 'epoch': 188.0}       \n",
+      "{'loss': 0.005, 'learning_rate': 1.3333333333333333e-05, 'epoch': 189.0}        \n",
+      "{'loss': 0.0049, 'learning_rate': 1.2222222222222222e-05, 'epoch': 190.0}       \n",
+      "{'loss': 0.0049, 'learning_rate': 1.1111111111111112e-05, 'epoch': 191.0}       \n",
+      "{'loss': 0.0049, 'learning_rate': 1e-05, 'epoch': 192.0}                        \n",
+      "{'loss': 0.0049, 'learning_rate': 8.88888888888889e-06, 'epoch': 193.0}         \n",
+      "{'loss': 0.0048, 'learning_rate': 7.777777777777777e-06, 'epoch': 194.0}        \n",
+      "{'loss': 0.0049, 'learning_rate': 6.666666666666667e-06, 'epoch': 195.0}        \n",
+      "{'loss': 0.005, 'learning_rate': 5.555555555555556e-06, 'epoch': 196.0}         \n",
+      "{'loss': 0.005, 'learning_rate': 4.444444444444445e-06, 'epoch': 197.0}         \n",
+      "{'loss': 0.0049, 'learning_rate': 3.3333333333333333e-06, 'epoch': 198.0}       \n",
+      "{'loss': 0.0049, 'learning_rate': 2.2222222222222225e-06, 'epoch': 199.0}       \n",
+      "{'loss': 0.0048, 'learning_rate': 1.1111111111111112e-06, 'epoch': 200.0}       \n",
+      "{'train_runtime': 2631.4323, 'train_samples_per_second': 0.304, 'train_steps_per_second': 0.076, 'train_loss': 0.34155846770387144, 'epoch': 200.0}\n",
+      "100%|█████████████████████████████████████████| 200/200 [43:51<00:00, 13.16s/it]\n",
+      "> \u001b[1mINFO    Finished training, saving model...\u001b[0m\n",
+      "CPU times: user 12.4 s, sys: 4.81 s, total: 17.3 s\n",
+      "Wall time: 44min 7s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "!autotrain llm \\\n",
+    "--train \\\n",
+    "--model ${MODEL_NAME} \\\n",
+    "--project-name ${PROJECT_NAME} \\\n",
+    "--data-path data/ \\\n",
+    "--text-column text \\\n",
+    "--lr ${LEARNING_RATE} \\\n",
+    "--batch-size ${BATCH_SIZE} \\\n",
+    "--epochs ${NUM_EPOCHS} \\\n",
+    "--block-size ${BLOCK_SIZE} \\\n",
+    "--warmup-ratio ${WARMUP_RATIO} \\\n",
+    "--lora-r ${LORA_R} \\\n",
+    "--lora-alpha ${LORA_ALPHA} \\\n",
+    "--lora-dropout ${LORA_DROPOUT} \\\n",
+    "--weight-decay ${WEIGHT_DECAY} \\\n",
+    "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
+    "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
+    "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
+    "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
+    "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "id": "gdGQQoED1WSd"
+   },
+   "outputs": [],
+   "source": [
+    "# !mkdir data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "id": "J3_aYwtv5LtN"
+   },
+   "outputs": [],
+   "source": [
+    "# cd data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "id": "FtoUbYWR5RSD"
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "df = pd.read_csv('./data/train.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 536
+    },
+    "id": "Vg9SeMOf8Zh5",
+    "outputId": "4d86182d-dd4e-4397-cdf9-b86c3816cd4e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(90, 3)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Description</th>\n",
+       "      <th>Relevances</th>\n",
+       "      <th>text</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Give the processor information</td>\n",
+       "      <td>vendor names of processors</td>\n",
+       "      <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>find the speed of processor</td>\n",
+       "      <td>speeds of processors</td>\n",
+       "      <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>find unique processor names</td>\n",
+       "      <td>unique values of vendor names of processors</td>\n",
+       "      <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>find name of operating system</td>\n",
+       "      <td>name of operating system</td>\n",
+       "      <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>find family names of processor</td>\n",
+       "      <td>family names of processors</td>\n",
+       "      <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       Description  \\\n",
+       "0  Give the processor information    \n",
+       "1     find the speed of processor    \n",
+       "2      find unique processor names   \n",
+       "3    find name of operating system   \n",
+       "4   find family names of processor   \n",
+       "\n",
+       "                                    Relevances  \\\n",
+       "0                   vendor names of processors   \n",
+       "1                         speeds of processors   \n",
+       "2  unique values of vendor names of processors   \n",
+       "3                     name of operating system   \n",
+       "4                   family names of processors   \n",
+       "\n",
+       "                                                text  \n",
+       "0  <s>[INST] <<SYS>> Write the BigFixRelevance fo...  \n",
+       "1  <s>[INST] <<SYS>> Write the BigFixRelevance fo...  \n",
+       "2  <s>[INST] <<SYS>> Write the BigFixRelevance fo...  \n",
+       "3  <s>[INST] <<SYS>> Write the BigFixRelevance fo...  \n",
+       "4  <s>[INST] <<SYS>> Write the BigFixRelevance fo...  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(df.shape)\n",
+    "df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "id": "ZxC7Rw7TFTPY"
+   },
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from typing import Optional\n",
+    "\n",
+    "import torch\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class TextGenerationInference:\n",
+    "    model_path: str = \"my_autotrain_llm_sys_temp_meta_llama_chat\"\n",
+    "    use_int4: Optional[bool] = False\n",
+    "    use_int8: Optional[bool] = False\n",
+    "    temperature: Optional[float] = 0.6\n",
+    "    top_k: Optional[int] = 50\n",
+    "    top_p: Optional[float] = 0.95\n",
+    "    repetition_penalty: Optional[float] = 1.0\n",
+    "    num_return_sequences: Optional[int] = 1\n",
+    "    num_beams: Optional[int] = 5\n",
+    "    max_new_tokens: Optional[int] = 1024\n",
+    "    do_sample: Optional[bool] = True\n",
+    "\n",
+    "    def __post_init__(self):\n",
+    "        self.model = AutoModelForCausalLM.from_pretrained(\n",
+    "            self.model_path,\n",
+    "            load_in_4bit=self.use_int4,\n",
+    "            load_in_8bit=self.use_int8,\n",
+    "            torch_dtype=torch.float16,\n",
+    "            trust_remote_code=True,\n",
+    "            device_map=\"auto\",\n",
+    "            offload_folder=\"/azusers/work/Hemant/data\"   # \"./data\"\n",
+    "        )\n",
+    "        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)\n",
+    "        self.model.eval()\n",
+    "        self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "        self.generation_config = GenerationConfig(\n",
+    "            temperature=self.temperature,\n",
+    "            top_k=self.top_k,\n",
+    "            top_p=self.top_p,\n",
+    "            repetition_penalty=self.repetition_penalty,\n",
+    "            num_return_sequences=self.num_return_sequences,\n",
+    "            num_beams=self.num_beams,\n",
+    "            max_length=self.max_new_tokens,\n",
+    "            eos_token_id=self.tokenizer.eos_token_id,\n",
+    "            do_sample=self.do_sample,\n",
+    "            max_new_tokens=self.max_new_tokens,\n",
+    "        )\n",
+    "\n",
+    "#     def chat(self, prompt):\n",
+    "#         inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
+    "#         outputs = self.model.generate(**inputs, generation_config=self.generation_config)\n",
+    "#         return self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+    "    \n",
+    "#     def chat(self, prompt):\n",
+    "#         inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
+    "\n",
+    "#         # Ensure the generation config uses beam search and returns 5 sequences\n",
+    "#         self.generation_config['num_beams'] = 5\n",
+    "#         self.generation_config['num_return_sequences'] = 5\n",
+    "\n",
+    "#         outputs = self.model.generate(**inputs, **self.generation_config)\n",
+    "\n",
+    "#         # Decode each of the returned sequences\n",
+    "#         responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n",
+    "#         return responses\n",
+    "    \n",
+    "    def chat(self, prompt):\n",
+    "        inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
+    "\n",
+    "        # Ensure the generation config uses beam search and returns 5 sequences\n",
+    "        setattr(self.generation_config, 'num_beams', 5)\n",
+    "        setattr(self.generation_config, 'num_return_sequences', 5)\n",
+    "\n",
+    "        outputs = self.model.generate(\n",
+    "            **inputs, \n",
+    "            num_beams=self.generation_config.num_beams, \n",
+    "            num_return_sequences=self.generation_config.num_return_sequences\n",
+    "        )\n",
+    "\n",
+    "        # Decode each of the returned sequences\n",
+    "        responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n",
+    "        return responses\n",
+    "\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 538,
+     "referenced_widgets": [
+      "35c2527c83f94eb983b1ae730ed91747",
+      "2392776c72b94666abf1fd5f62549b87",
+      "1c57bab4fd2e47f1b90a3d6758fa77cb",
+      "ef48f3ab7d2c4cefbb4f6227d61d2424",
+      "9e13b105fc8a4207969a41055d0dae4e",
+      "b8e4bd68d27a4d6386814fede84da99e",
+      "94c0c32442fe47fb926be879602cdd1c",
+      "3b8f2fec5e3d4ab69855adf5cd7b0ffb",
+      "73a820a723814b8aba81c89bac03716e",
+      "1a95698310fa41f89c221f7d775fadf7",
+      "8dc5f8affc6e43329e63112a4e552fff"
+     ]
+    },
+    "id": "DVwk_COZaU5e",
+    "outputId": "ed811938-63d2-4fac-9a19-fc9029f0e0f6"
+   },
+   "outputs": [],
+   "source": [
+    "# %%time\n",
+    "# inference = TextGenerationInference()  # Create an instance with default settings\n",
+    "\n",
+    "# prompt = \"\"\"\n",
+    "# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+    "\n",
+    "# ### Instruction:\n",
+    "# Continue the story based on the given starting sentence.\n",
+    "\n",
+    "# ### Input:\n",
+    "# Once upon a time,\n",
+    "# \"\"\"\n",
+    "\n",
+    "# response = inference.chat(prompt)\n",
+    "# print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "torch.cuda.empty_cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7c38ada366954e7aa5d6456793d72b87",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You shouldn't move a model when it is dispatched on multiple devices.\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "You can't move a model that has some modules offloaded to cpu or disk.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "File \u001b[0;32m<timed exec>:1\u001b[0m\n",
+      "File \u001b[0;32m<string>:14\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, model_path, use_int4, use_int8, temperature, top_k, top_p, repetition_penalty, num_return_sequences, num_beams, max_new_tokens, do_sample)\u001b[0m\n",
+      "Cell \u001b[0;32mIn[14], line 23\u001b[0m, in \u001b[0;36mTextGenerationInference.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__post_init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 23\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     24\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     25\u001b[0m \u001b[43m        \u001b[49m\u001b[43mload_in_4bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int4\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     26\u001b[0m \u001b[43m        \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int8\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     27\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     28\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     29\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     30\u001b[0m \u001b[43m        \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/azusers/work/Hemant/data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m   \u001b[49m\u001b[38;5;66;43;03m# \"./data\"\u001b[39;49;00m\n\u001b[1;32m     31\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     32\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_path, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     33\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39meval()\n",
+      "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:563\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m    561\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    562\u001b[0m     model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 563\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    564\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    565\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    566\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    567\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    568\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    569\u001b[0m )\n",
+      "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/modeling_utils.py:3253\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   3250\u001b[0m     model \u001b[38;5;241m=\u001b[39m quantizer\u001b[38;5;241m.\u001b[39mpost_init_model(model)\n\u001b[1;32m   3252\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _adapter_model_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3253\u001b[0m     \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3254\u001b[0m \u001b[43m        \u001b[49m\u001b[43m_adapter_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3255\u001b[0m \u001b[43m        \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3256\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3257\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3258\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_loading_info:\n\u001b[1;32m   3261\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m loading_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:180\u001b[0m, in \u001b[0;36mPeftAdapterMixin.load_adapter\u001b[0;34m(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m    174\u001b[0m \u001b[38;5;66;03m# Re-dispatch model and hooks in case the model is offloaded to CPU / Disk.\u001b[39;00m\n\u001b[1;32m    175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m    176\u001b[0m     (\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_device_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m    177\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhf_device_map\u001b[38;5;241m.\u001b[39mvalues())\u001b[38;5;241m.\u001b[39mintersection({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m})) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m    178\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_config) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m    179\u001b[0m ):\n\u001b[0;32m--> 180\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dispatch_accelerate_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    181\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    182\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    183\u001b[0m \u001b[43m        \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    184\u001b[0m \u001b[43m        \u001b[49m\u001b[43moffload_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    185\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:390\u001b[0m, in \u001b[0;36mPeftAdapterMixin._dispatch_accelerate_model\u001b[0;34m(self, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m    386\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(device_map, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    387\u001b[0m     device_map \u001b[38;5;241m=\u001b[39m infer_auto_device_map(\n\u001b[1;32m    388\u001b[0m         \u001b[38;5;28mself\u001b[39m, max_memory\u001b[38;5;241m=\u001b[39mmax_memory, no_split_module_classes\u001b[38;5;241m=\u001b[39mno_split_module_classes\n\u001b[1;32m    389\u001b[0m     )\n\u001b[0;32m--> 390\u001b[0m \u001b[43mdispatch_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    391\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    392\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    393\u001b[0m \u001b[43m    \u001b[49m\u001b[43moffload_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    394\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdispatch_model_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    395\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:426\u001b[0m, in \u001b[0;36mdispatch_model\u001b[0;34m(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)\u001b[0m\n\u001b[1;32m    424\u001b[0m device \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(device_map\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m device \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 426\u001b[0m     \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    428\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    429\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    430\u001b[0m     )\n",
+      "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:415\u001b[0m, in \u001b[0;36mdispatch_model.<locals>.add_warning.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    413\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m param \u001b[38;5;129;01min\u001b[39;00m model\u001b[38;5;241m.\u001b[39mparameters():\n\u001b[1;32m    414\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m param\u001b[38;5;241m.\u001b[39mdevice \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 415\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt move a model that has some modules offloaded to cpu or disk.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: You can't move a model that has some modules offloaded to cpu or disk."
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "inference = TextGenerationInference()  # Create an instance with default settings\n",
+    "\n",
+    "# Prompt 1\n",
+    "# prompt = \"\"\"\n",
+    "# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+    "\n",
+    "# ### Instruction:\n",
+    "# Write the relevance for the given input description.\n",
+    "\n",
+    "# ### Input:\n",
+    "# Find out about the specified mapped drive\n",
+    "# \"\"\"\n",
+    "\n",
+    "# Prompt 2\n",
+    "# prompt = \"\"\"\n",
+    "# ### Instruction:\n",
+    "# Write the relevance for the given input description.\n",
+    "\n",
+    "# ### Input:\n",
+    "# Find out about the specified mapped drive.\n",
+    "# \"\"\"\n",
+    "\n",
+    "# Prompt 3\n",
+    "prompt = \"\"\"\n",
+    "<s>[INST] <<SYS>> Write the BigFixRelevance for the following description: Give the processor information <</SYS>> [/INST]\n",
+    "\"\"\"\n",
+    "\n",
+    "response = inference.chat(prompt)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "relative_path = \"./data\"\n",
+    "absolute_path = os.path.abspath(relative_path)\n",
+    "print(absolute_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python (whatever you want to call it)",
+   "language": "python",
+   "name": "envname"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "1a95698310fa41f89c221f7d775fadf7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "1c57bab4fd2e47f1b90a3d6758fa77cb": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_3b8f2fec5e3d4ab69855adf5cd7b0ffb",
+      "max": 10,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_73a820a723814b8aba81c89bac03716e",
+      "value": 10
+     }
+    },
+    "2392776c72b94666abf1fd5f62549b87": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b8e4bd68d27a4d6386814fede84da99e",
+      "placeholder": "",
+      "style": "IPY_MODEL_94c0c32442fe47fb926be879602cdd1c",
+      "value": "Loading checkpoint shards: 100%"
+     }
+    },
+    "35c2527c83f94eb983b1ae730ed91747": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_2392776c72b94666abf1fd5f62549b87",
+       "IPY_MODEL_1c57bab4fd2e47f1b90a3d6758fa77cb",
+       "IPY_MODEL_ef48f3ab7d2c4cefbb4f6227d61d2424"
+      ],
+      "layout": "IPY_MODEL_9e13b105fc8a4207969a41055d0dae4e"
+     }
+    },
+    "3b8f2fec5e3d4ab69855adf5cd7b0ffb": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "73a820a723814b8aba81c89bac03716e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "8dc5f8affc6e43329e63112a4e552fff": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "94c0c32442fe47fb926be879602cdd1c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "9e13b105fc8a4207969a41055d0dae4e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b8e4bd68d27a4d6386814fede84da99e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ef48f3ab7d2c4cefbb4f6227d61d2424": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_1a95698310fa41f89c221f7d775fadf7",
+      "placeholder": "",
+      "style": "IPY_MODEL_8dc5f8affc6e43329e63112a4e552fff",
+      "value": " 10/10 [02:43&lt;00:00, 12.78s/it]"
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}