{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JvMRbVLEJlZT", "outputId": "306df70b-0c75-4781-a75b-d10957a028a3" }, "outputs": [], "source": [ "# #@title 🤗 AutoTrain LLM\n", "# #@markdown In order to use this colab\n", "# #@markdown - upload train.csv to a folder named `data/`\n", "# #@markdown - train.csv must contain a `text` column\n", "# #@markdown - choose a project name if you wish\n", "# #@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n", "# #@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n", "# #@markdown - update hyperparameters if you wish\n", "# #@markdown - click `Runtime > Run all` or run each cell individually\n", "\n", "import os\n", "# !pip install -U autotrain-advanced > install_logs.txt\n", "# !autotrain setup > setup_logs.txt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# pip install transformers --upgrad\n", "# !pip install torch --upgrade\n", "# !pip install tokenizers --upgrade" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "A2-_lkBS1WKA" }, "outputs": [], "source": [ "#@markdown ---\n", "#@markdown #### Project Config\n", "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n", "project_name = 'my_autotrain_llm_sys_temp_meta_llama_chat' # @param {type:\"string\"}\n", "model_name = \"meta-llama/Llama-2-7b-chat-hf\" # 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n", "\n", "#@markdown ---\n", "#@markdown #### Push to Hub?\n", "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n", "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n", "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n", "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n", "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n", "hf_token = \"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\" #@param {type:\"string\"}\n", "repo_id = \"hemantk089/llm_fine_tuning\" #@param {type:\"string\"}\n", "\n", "#@markdown ---\n", "#@markdown #### Hyperparameters\n", "learning_rate = 2e-4 # @param {type:\"number\"}\n", "num_epochs = 200 #@param {type:\"number\"}\n", "batch_size = 1 # @param {type:\"slider\", min:1, max:32, step:1}\n", "block_size = 1024 # @param {type:\"number\"}\n", "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n", "warmup_ratio = 0.1 # @param {type:\"number\"}\n", "weight_decay = 0.01 # @param {type:\"number\"}\n", "gradient_accumulation = 4 # @param {type:\"number\"}\n", "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "lora_r = 16 #@param {type:\"number\"}\n", "lora_alpha = 32 #@param {type:\"number\"}\n", "lora_dropout = 0.05 #@param {type:\"number\"}\n", "\n", "os.environ[\"PROJECT_NAME\"] = project_name\n", "os.environ[\"MODEL_NAME\"] = model_name\n", "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n", "os.environ[\"HF_TOKEN\"] = hf_token\n", "os.environ[\"REPO_ID\"] = repo_id\n", "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n", "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n", "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n", "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n", "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n", "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n", "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n", "os.environ[\"USE_FP16\"] = str(use_fp16)\n", "os.environ[\"USE_PEFT\"] = str(use_peft)\n", "os.environ[\"USE_INT4\"] = str(use_int4)\n", "os.environ[\"LORA_R\"] = str(lora_r)\n", "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n", "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "g3cd_ED_yXXt", "outputId": "d753c017-cf19-4822-b8ea-c9e6b70fc2d1" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2023-10-04 14:41:59,153] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "> \u001b[1mINFO Running LLM\u001b[0m\n", "> \u001b[1mINFO Params: Namespace(add_eos_token=False, auto_find_batch_size=False, backend='default', block_size=1024, data_path='data/', deploy=False, evaluation_strategy='epoch', fp16=True, func=, gradient_accumulation_steps=4, inference=False, learning_rate=0.0002, logging_steps=-1, lora_alpha=32, lora_dropout=0.05, lora_r=16, max_grad_norm=1.0, merge_adapter=False, model='meta-llama/Llama-2-7b-chat-hf', model_max_length=1024, num_train_epochs=200, optimizer='adamw_torch', project_name='my_autotrain_llm_sys_temp_meta_llama_chat', push_to_hub=False, repo_id=None, save_strategy='epoch', save_total_limit=1, scheduler='linear', seed=42, target_modules=None, text_column='text', token=None, train=True, train_batch_size=1, train_split='train', trainer='default', use_flash_attention_2=False, use_int4=True, use_int8=False, use_peft=True, username=None, valid_split=None, version=False, warmup_ratio=0.1, weight_decay=0.01)\u001b[0m\n", "> \u001b[1mINFO loading dataset from csv\u001b[0m\n", "Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.\n", "Using pad_token, but it is not set yet.\n", "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00, 1.78s/it]\n", "You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc\n", "Running tokenizer on train dataset: 100%|█| 90/90 [00:00<00:00, 8654.20 examples\n", "Grouping texts in chunks of 1024 (num_proc=4): 100%|█| 90/90 [00:00<00:00, 729.7\n", "> \u001b[1mINFO creating trainer\u001b[0m\n", "{'loss': 2.5454, 'learning_rate': 1e-05, 'epoch': 1.0} \n", "{'loss': 2.5454, 'learning_rate': 2e-05, 'epoch': 2.0} \n", "{'loss': 2.5344, 'learning_rate': 3e-05, 'epoch': 3.0} \n", "{'loss': 2.505, 'learning_rate': 4e-05, 'epoch': 4.0} \n", "{'loss': 2.4569, 'learning_rate': 5e-05, 'epoch': 5.0} \n", "{'loss': 2.3922, 'learning_rate': 6e-05, 'epoch': 6.0} \n", "{'loss': 2.3117, 'learning_rate': 7e-05, 'epoch': 7.0} \n", "{'loss': 2.2224, 'learning_rate': 8e-05, 'epoch': 8.0} \n", "{'loss': 2.1331, 'learning_rate': 9e-05, 'epoch': 9.0} \n", "{'loss': 2.0432, 'learning_rate': 0.0001, 'epoch': 10.0} \n", "{'loss': 1.9529, 'learning_rate': 0.00011000000000000002, 'epoch': 11.0} \n", "{'loss': 1.8601, 'learning_rate': 0.00012, 'epoch': 12.0} \n", "{'loss': 1.7627, 'learning_rate': 0.00013000000000000002, 'epoch': 13.0} \n", "{'loss': 1.6574, 'learning_rate': 0.00014, 'epoch': 14.0} \n", "{'loss': 1.5425, 'learning_rate': 0.00015000000000000001, 'epoch': 15.0} \n", "{'loss': 1.4224, 'learning_rate': 0.00016, 'epoch': 16.0} \n", "{'loss': 1.3218, 'learning_rate': 0.00017, 'epoch': 17.0} \n", "{'loss': 1.2318, 'learning_rate': 0.00018, 'epoch': 18.0} \n", "{'loss': 1.1835, 'learning_rate': 0.00019, 'epoch': 19.0} \n", "{'loss': 1.2394, 'learning_rate': 0.0002, 'epoch': 20.0} \n", "{'loss': 1.2134, 'learning_rate': 0.0001988888888888889, 'epoch': 21.0} \n", "{'loss': 1.1547, 'learning_rate': 0.00019777777777777778, 'epoch': 22.0} \n", "{'loss': 1.1043, 'learning_rate': 0.00019666666666666666, 'epoch': 23.0} \n", "{'loss': 1.0702, 'learning_rate': 0.00019555555555555556, 'epoch': 24.0} \n", "{'loss': 1.0691, 'learning_rate': 0.00019444444444444446, 'epoch': 25.0} \n", "{'loss': 1.043, 'learning_rate': 0.00019333333333333333, 'epoch': 26.0} \n", "{'loss': 1.0063, 'learning_rate': 0.00019222222222222224, 'epoch': 27.0} \n", "{'loss': 0.9849, 'learning_rate': 0.00019111111111111114, 'epoch': 28.0} \n", "{'loss': 0.9743, 'learning_rate': 0.00019, 'epoch': 29.0} \n", "{'loss': 0.9561, 'learning_rate': 0.00018888888888888888, 'epoch': 30.0} \n", "{'loss': 0.933, 'learning_rate': 0.00018777777777777779, 'epoch': 31.0} \n", "{'loss': 0.9131, 'learning_rate': 0.0001866666666666667, 'epoch': 32.0} \n", "{'loss': 0.9, 'learning_rate': 0.00018555555555555556, 'epoch': 33.0} \n", "{'loss': 0.8814, 'learning_rate': 0.00018444444444444446, 'epoch': 34.0} \n", "{'loss': 0.8579, 'learning_rate': 0.00018333333333333334, 'epoch': 35.0} \n", "{'loss': 0.836, 'learning_rate': 0.00018222222222222224, 'epoch': 36.0} \n", "{'loss': 0.8163, 'learning_rate': 0.0001811111111111111, 'epoch': 37.0} \n", "{'loss': 0.7901, 'learning_rate': 0.00018, 'epoch': 38.0} \n", "{'loss': 0.7646, 'learning_rate': 0.0001788888888888889, 'epoch': 39.0} \n", "{'loss': 0.743, 'learning_rate': 0.00017777777777777779, 'epoch': 40.0} \n", "{'loss': 0.7167, 'learning_rate': 0.00017666666666666666, 'epoch': 41.0} \n", "{'loss': 0.6906, 'learning_rate': 0.00017555555555555556, 'epoch': 42.0} \n", "{'loss': 0.6648, 'learning_rate': 0.00017444444444444446, 'epoch': 43.0} \n", "{'loss': 0.6364, 'learning_rate': 0.00017333333333333334, 'epoch': 44.0} \n", "{'loss': 0.6111, 'learning_rate': 0.00017222222222222224, 'epoch': 45.0} \n", "{'loss': 0.5821, 'learning_rate': 0.0001711111111111111, 'epoch': 46.0} \n", "{'loss': 0.5554, 'learning_rate': 0.00017, 'epoch': 47.0} \n", "{'loss': 0.5251, 'learning_rate': 0.00016888888888888889, 'epoch': 48.0} \n", "{'loss': 0.4958, 'learning_rate': 0.0001677777777777778, 'epoch': 49.0} \n", "{'loss': 0.4603, 'learning_rate': 0.0001666666666666667, 'epoch': 50.0} \n", "{'loss': 0.4281, 'learning_rate': 0.00016555555555555556, 'epoch': 51.0} \n", "{'loss': 0.3929, 'learning_rate': 0.00016444444444444444, 'epoch': 52.0} \n", "{'loss': 0.3468, 'learning_rate': 0.00016333333333333334, 'epoch': 53.0} \n", "{'loss': 0.3018, 'learning_rate': 0.00016222222222222224, 'epoch': 54.0} \n", "{'loss': 0.2756, 'learning_rate': 0.00016222222222222224, 'epoch': 55.0} \n", "{'loss': 0.2626, 'learning_rate': 0.0001611111111111111, 'epoch': 56.0} \n", "{'loss': 0.2279, 'learning_rate': 0.00016, 'epoch': 57.0} \n", "{'loss': 0.2086, 'learning_rate': 0.0001588888888888889, 'epoch': 58.0} \n", "{'loss': 0.1972, 'learning_rate': 0.0001577777777777778, 'epoch': 59.0} \n", "{'loss': 0.179, 'learning_rate': 0.00015666666666666666, 'epoch': 60.0} \n", "{'loss': 0.156, 'learning_rate': 0.00015555555555555556, 'epoch': 61.0} \n", "{'loss': 0.1361, 'learning_rate': 0.00015444444444444446, 'epoch': 62.0} \n", "{'loss': 0.1177, 'learning_rate': 0.00015333333333333334, 'epoch': 63.0} \n", "{'loss': 0.1009, 'learning_rate': 0.0001522222222222222, 'epoch': 64.0} \n", "{'loss': 0.0834, 'learning_rate': 0.0001511111111111111, 'epoch': 65.0} \n", "{'loss': 0.0682, 'learning_rate': 0.00015000000000000001, 'epoch': 66.0} \n", "{'loss': 0.057, 'learning_rate': 0.0001488888888888889, 'epoch': 67.0} \n", "{'loss': 0.0469, 'learning_rate': 0.0001477777777777778, 'epoch': 68.0} \n", "{'loss': 0.0382, 'learning_rate': 0.00014666666666666666, 'epoch': 69.0} \n", "{'loss': 0.0327, 'learning_rate': 0.00014555555555555556, 'epoch': 70.0} \n", "{'loss': 0.0272, 'learning_rate': 0.00014444444444444444, 'epoch': 71.0} \n", "{'loss': 0.0222, 'learning_rate': 0.00014333333333333334, 'epoch': 72.0} \n", "{'loss': 0.0191, 'learning_rate': 0.00014222222222222224, 'epoch': 73.0} \n", "{'loss': 0.0165, 'learning_rate': 0.00014111111111111111, 'epoch': 74.0} \n", "{'loss': 0.0145, 'learning_rate': 0.00014, 'epoch': 75.0} \n", "{'loss': 0.0133, 'learning_rate': 0.0001388888888888889, 'epoch': 76.0} \n", "{'loss': 0.0122, 'learning_rate': 0.0001377777777777778, 'epoch': 77.0} \n", "{'loss': 0.011, 'learning_rate': 0.00013666666666666666, 'epoch': 78.0} \n", "{'loss': 0.0104, 'learning_rate': 0.00013555555555555556, 'epoch': 79.0} \n", "{'loss': 0.0099, 'learning_rate': 0.00013444444444444447, 'epoch': 80.0} \n", "{'loss': 0.0094, 'learning_rate': 0.00013333333333333334, 'epoch': 81.0} \n", "{'loss': 0.009, 'learning_rate': 0.00013222222222222221, 'epoch': 82.0} \n", "{'loss': 0.0088, 'learning_rate': 0.00013111111111111111, 'epoch': 83.0} \n", "{'loss': 0.0087, 'learning_rate': 0.00013000000000000002, 'epoch': 84.0} \n", "{'loss': 0.0083, 'learning_rate': 0.00012888888888888892, 'epoch': 85.0} \n", "{'loss': 0.0083, 'learning_rate': 0.00012777777777777776, 'epoch': 86.0} \n", "{'loss': 0.0077, 'learning_rate': 0.00012666666666666666, 'epoch': 87.0} \n", "{'loss': 0.0077, 'learning_rate': 0.00012555555555555557, 'epoch': 88.0} \n", "{'loss': 0.0075, 'learning_rate': 0.00012444444444444444, 'epoch': 89.0} \n", "{'loss': 0.0074, 'learning_rate': 0.00012333333333333334, 'epoch': 90.0} \n", "{'loss': 0.0072, 'learning_rate': 0.00012222222222222224, 'epoch': 91.0} \n", "{'loss': 0.0073, 'learning_rate': 0.0001211111111111111, 'epoch': 92.0} \n", "{'loss': 0.0068, 'learning_rate': 0.00012, 'epoch': 93.0} \n", "{'loss': 0.0068, 'learning_rate': 0.00011888888888888889, 'epoch': 94.0} \n", "{'loss': 0.0067, 'learning_rate': 0.00011777777777777779, 'epoch': 95.0} \n", "{'loss': 0.0066, 'learning_rate': 0.00011666666666666668, 'epoch': 96.0} \n", "{'loss': 0.0065, 'learning_rate': 0.00011555555555555555, 'epoch': 97.0} \n", "{'loss': 0.0065, 'learning_rate': 0.00011444444444444444, 'epoch': 98.0} \n", "{'loss': 0.0062, 'learning_rate': 0.00011333333333333334, 'epoch': 99.0} \n", "{'loss': 0.0062, 'learning_rate': 0.00011222222222222223, 'epoch': 100.0} \n", "{'loss': 0.0064, 'learning_rate': 0.00011111111111111112, 'epoch': 101.0} \n", "{'loss': 0.006, 'learning_rate': 0.00011000000000000002, 'epoch': 102.0} \n", "{'loss': 0.0061, 'learning_rate': 0.00010888888888888889, 'epoch': 103.0} \n", "{'loss': 0.0061, 'learning_rate': 0.00010777777777777778, 'epoch': 104.0} \n", "{'loss': 0.0061, 'learning_rate': 0.00010666666666666667, 'epoch': 105.0} \n", "{'loss': 0.0062, 'learning_rate': 0.00010555555555555557, 'epoch': 106.0} \n", "{'loss': 0.006, 'learning_rate': 0.00010444444444444445, 'epoch': 107.0} \n", "{'loss': 0.0061, 'learning_rate': 0.00010333333333333334, 'epoch': 108.0} \n", "{'loss': 0.0059, 'learning_rate': 0.00010222222222222222, 'epoch': 109.0} \n", "{'loss': 0.0059, 'learning_rate': 0.00010111111111111112, 'epoch': 110.0} \n", "{'loss': 0.0059, 'learning_rate': 0.0001, 'epoch': 111.0} \n", "{'loss': 0.0058, 'learning_rate': 9.888888888888889e-05, 'epoch': 112.0} \n", "{'loss': 0.0058, 'learning_rate': 9.777777777777778e-05, 'epoch': 113.0} \n", "{'loss': 0.0057, 'learning_rate': 9.666666666666667e-05, 'epoch': 114.0} \n", "{'loss': 0.0057, 'learning_rate': 9.555555555555557e-05, 'epoch': 115.0} \n", "{'loss': 0.0057, 'learning_rate': 9.444444444444444e-05, 'epoch': 116.0} \n", "{'loss': 0.0056, 'learning_rate': 9.333333333333334e-05, 'epoch': 117.0} \n", "{'loss': 0.0057, 'learning_rate': 9.222222222222223e-05, 'epoch': 118.0} \n", "{'loss': 0.0057, 'learning_rate': 9.111111111111112e-05, 'epoch': 119.0} \n", "{'loss': 0.0057, 'learning_rate': 9e-05, 'epoch': 120.0} \n", "{'loss': 0.0057, 'learning_rate': 8.888888888888889e-05, 'epoch': 121.0} \n", "{'loss': 0.0056, 'learning_rate': 8.777777777777778e-05, 'epoch': 122.0} \n", "{'loss': 0.0056, 'learning_rate': 8.666666666666667e-05, 'epoch': 123.0} \n", "{'loss': 0.0056, 'learning_rate': 8.555555555555556e-05, 'epoch': 124.0} \n", "{'loss': 0.0056, 'learning_rate': 8.444444444444444e-05, 'epoch': 125.0} \n", "{'loss': 0.0056, 'learning_rate': 8.333333333333334e-05, 'epoch': 126.0} \n", "{'loss': 0.0056, 'learning_rate': 8.222222222222222e-05, 'epoch': 127.0} \n", "{'loss': 0.0056, 'learning_rate': 8.111111111111112e-05, 'epoch': 128.0} \n", "{'loss': 0.0054, 'learning_rate': 8e-05, 'epoch': 129.0} \n", "{'loss': 0.0055, 'learning_rate': 7.88888888888889e-05, 'epoch': 130.0} \n", "{'loss': 0.0055, 'learning_rate': 7.777777777777778e-05, 'epoch': 131.0} \n", "{'loss': 0.0055, 'learning_rate': 7.666666666666667e-05, 'epoch': 132.0} \n", "{'loss': 0.0054, 'learning_rate': 7.555555555555556e-05, 'epoch': 133.0} \n", "{'loss': 0.0055, 'learning_rate': 7.444444444444444e-05, 'epoch': 134.0} \n", "{'loss': 0.0055, 'learning_rate': 7.333333333333333e-05, 'epoch': 135.0} \n", "{'loss': 0.0055, 'learning_rate': 7.222222222222222e-05, 'epoch': 136.0} \n", "{'loss': 0.0054, 'learning_rate': 7.111111111111112e-05, 'epoch': 137.0} \n", "{'loss': 0.0054, 'learning_rate': 7e-05, 'epoch': 138.0} \n", "{'loss': 0.0053, 'learning_rate': 6.88888888888889e-05, 'epoch': 139.0} \n", "{'loss': 0.0053, 'learning_rate': 6.777777777777778e-05, 'epoch': 140.0} \n", "{'loss': 0.0055, 'learning_rate': 6.666666666666667e-05, 'epoch': 141.0} \n", "{'loss': 0.0053, 'learning_rate': 6.555555555555556e-05, 'epoch': 142.0} \n", "{'loss': 0.0054, 'learning_rate': 6.444444444444446e-05, 'epoch': 143.0} \n", "{'loss': 0.0054, 'learning_rate': 6.333333333333333e-05, 'epoch': 144.0} \n", "{'loss': 0.0052, 'learning_rate': 6.222222222222222e-05, 'epoch': 145.0} \n", "{'loss': 0.0053, 'learning_rate': 6.111111111111112e-05, 'epoch': 146.0} \n", "{'loss': 0.0053, 'learning_rate': 6e-05, 'epoch': 147.0} \n", "{'loss': 0.0052, 'learning_rate': 5.8888888888888896e-05, 'epoch': 148.0} \n", "{'loss': 0.0051, 'learning_rate': 5.7777777777777776e-05, 'epoch': 149.0} \n", "{'loss': 0.0053, 'learning_rate': 5.666666666666667e-05, 'epoch': 150.0} \n", "{'loss': 0.0052, 'learning_rate': 5.555555555555556e-05, 'epoch': 151.0} \n", "{'loss': 0.0053, 'learning_rate': 5.4444444444444446e-05, 'epoch': 152.0} \n", "{'loss': 0.0053, 'learning_rate': 5.333333333333333e-05, 'epoch': 153.0} \n", "{'loss': 0.0052, 'learning_rate': 5.222222222222223e-05, 'epoch': 154.0} \n", "{'loss': 0.0051, 'learning_rate': 5.111111111111111e-05, 'epoch': 155.0} \n", "{'loss': 0.0051, 'learning_rate': 5e-05, 'epoch': 156.0} \n", "{'loss': 0.0053, 'learning_rate': 4.888888888888889e-05, 'epoch': 157.0} \n", "{'loss': 0.0053, 'learning_rate': 4.7777777777777784e-05, 'epoch': 158.0} \n", "{'loss': 0.0052, 'learning_rate': 4.666666666666667e-05, 'epoch': 159.0} \n", "{'loss': 0.0051, 'learning_rate': 4.555555555555556e-05, 'epoch': 160.0} \n", "{'loss': 0.0053, 'learning_rate': 4.4444444444444447e-05, 'epoch': 161.0} \n", "{'loss': 0.0052, 'learning_rate': 4.3333333333333334e-05, 'epoch': 162.0} \n", "{'loss': 0.0051, 'learning_rate': 4.222222222222222e-05, 'epoch': 163.0} \n", "{'loss': 0.0052, 'learning_rate': 4.111111111111111e-05, 'epoch': 164.0} \n", "{'loss': 0.0051, 'learning_rate': 4e-05, 'epoch': 165.0} \n", "{'loss': 0.0053, 'learning_rate': 3.888888888888889e-05, 'epoch': 166.0} \n", "{'loss': 0.0051, 'learning_rate': 3.777777777777778e-05, 'epoch': 167.0} \n", "{'loss': 0.0051, 'learning_rate': 3.6666666666666666e-05, 'epoch': 168.0} \n", "{'loss': 0.0051, 'learning_rate': 3.555555555555556e-05, 'epoch': 169.0} \n", "{'loss': 0.005, 'learning_rate': 3.444444444444445e-05, 'epoch': 170.0} \n", "{'loss': 0.005, 'learning_rate': 3.3333333333333335e-05, 'epoch': 171.0} \n", "{'loss': 0.0051, 'learning_rate': 3.222222222222223e-05, 'epoch': 172.0} \n", "{'loss': 0.0051, 'learning_rate': 3.111111111111111e-05, 'epoch': 173.0} \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{'loss': 0.0051, 'learning_rate': 3e-05, 'epoch': 174.0} \n", "{'loss': 0.0051, 'learning_rate': 2.8888888888888888e-05, 'epoch': 175.0} \n", "{'loss': 0.005, 'learning_rate': 2.777777777777778e-05, 'epoch': 176.0} \n", "{'loss': 0.0052, 'learning_rate': 2.6666666666666667e-05, 'epoch': 177.0} \n", "{'loss': 0.005, 'learning_rate': 2.5555555555555554e-05, 'epoch': 178.0} \n", "{'loss': 0.005, 'learning_rate': 2.4444444444444445e-05, 'epoch': 179.0} \n", "{'loss': 0.005, 'learning_rate': 2.3333333333333336e-05, 'epoch': 180.0} \n", "{'loss': 0.005, 'learning_rate': 2.2222222222222223e-05, 'epoch': 181.0} \n", "{'loss': 0.005, 'learning_rate': 2.111111111111111e-05, 'epoch': 182.0} \n", "{'loss': 0.005, 'learning_rate': 2e-05, 'epoch': 183.0} \n", "{'loss': 0.0049, 'learning_rate': 1.888888888888889e-05, 'epoch': 184.0} \n", "{'loss': 0.0049, 'learning_rate': 1.777777777777778e-05, 'epoch': 185.0} \n", "{'loss': 0.005, 'learning_rate': 1.6666666666666667e-05, 'epoch': 186.0} \n", "{'loss': 0.005, 'learning_rate': 1.5555555555555555e-05, 'epoch': 187.0} \n", "{'loss': 0.0049, 'learning_rate': 1.4444444444444444e-05, 'epoch': 188.0} \n", "{'loss': 0.005, 'learning_rate': 1.3333333333333333e-05, 'epoch': 189.0} \n", "{'loss': 0.0049, 'learning_rate': 1.2222222222222222e-05, 'epoch': 190.0} \n", "{'loss': 0.0049, 'learning_rate': 1.1111111111111112e-05, 'epoch': 191.0} \n", "{'loss': 0.0049, 'learning_rate': 1e-05, 'epoch': 192.0} \n", "{'loss': 0.0049, 'learning_rate': 8.88888888888889e-06, 'epoch': 193.0} \n", "{'loss': 0.0048, 'learning_rate': 7.777777777777777e-06, 'epoch': 194.0} \n", "{'loss': 0.0049, 'learning_rate': 6.666666666666667e-06, 'epoch': 195.0} \n", "{'loss': 0.005, 'learning_rate': 5.555555555555556e-06, 'epoch': 196.0} \n", "{'loss': 0.005, 'learning_rate': 4.444444444444445e-06, 'epoch': 197.0} \n", "{'loss': 0.0049, 'learning_rate': 3.3333333333333333e-06, 'epoch': 198.0} \n", "{'loss': 0.0049, 'learning_rate': 2.2222222222222225e-06, 'epoch': 199.0} \n", "{'loss': 0.0048, 'learning_rate': 1.1111111111111112e-06, 'epoch': 200.0} \n", "{'train_runtime': 2631.4323, 'train_samples_per_second': 0.304, 'train_steps_per_second': 0.076, 'train_loss': 0.34155846770387144, 'epoch': 200.0}\n", "100%|█████████████████████████████████████████| 200/200 [43:51<00:00, 13.16s/it]\n", "> \u001b[1mINFO Finished training, saving model...\u001b[0m\n", "CPU times: user 12.4 s, sys: 4.81 s, total: 17.3 s\n", "Wall time: 44min 7s\n" ] } ], "source": [ "%%time\n", "!autotrain llm \\\n", "--train \\\n", "--model ${MODEL_NAME} \\\n", "--project-name ${PROJECT_NAME} \\\n", "--data-path data/ \\\n", "--text-column text \\\n", "--lr ${LEARNING_RATE} \\\n", "--batch-size ${BATCH_SIZE} \\\n", "--epochs ${NUM_EPOCHS} \\\n", "--block-size ${BLOCK_SIZE} \\\n", "--warmup-ratio ${WARMUP_RATIO} \\\n", "--lora-r ${LORA_R} \\\n", "--lora-alpha ${LORA_ALPHA} \\\n", "--lora-dropout ${LORA_DROPOUT} \\\n", "--weight-decay ${WEIGHT_DECAY} \\\n", "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n", "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n", "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n", "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n", "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "gdGQQoED1WSd" }, "outputs": [], "source": [ "# !mkdir data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "J3_aYwtv5LtN" }, "outputs": [], "source": [ "# cd data" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "FtoUbYWR5RSD" }, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv('./data/train.csv')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 536 }, "id": "Vg9SeMOf8Zh5", "outputId": "4d86182d-dd4e-4397-cdf9-b86c3816cd4e" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(90, 3)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DescriptionRelevancestext
0Give the processor informationvendor names of processors<s>[INST] <<SYS>> Write the BigFixRelevance fo...
1find the speed of processorspeeds of processors<s>[INST] <<SYS>> Write the BigFixRelevance fo...
2find unique processor namesunique values of vendor names of processors<s>[INST] <<SYS>> Write the BigFixRelevance fo...
3find name of operating systemname of operating system<s>[INST] <<SYS>> Write the BigFixRelevance fo...
4find family names of processorfamily names of processors<s>[INST] <<SYS>> Write the BigFixRelevance fo...
\n", "
" ], "text/plain": [ " Description \\\n", "0 Give the processor information \n", "1 find the speed of processor \n", "2 find unique processor names \n", "3 find name of operating system \n", "4 find family names of processor \n", "\n", " Relevances \\\n", "0 vendor names of processors \n", "1 speeds of processors \n", "2 unique values of vendor names of processors \n", "3 name of operating system \n", "4 family names of processors \n", "\n", " text \n", "0 [INST] <> Write the BigFixRelevance fo... \n", "1 [INST] <> Write the BigFixRelevance fo... \n", "2 [INST] <> Write the BigFixRelevance fo... \n", "3 [INST] <> Write the BigFixRelevance fo... \n", "4 [INST] <> Write the BigFixRelevance fo... " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(df.shape)\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "ZxC7Rw7TFTPY" }, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from typing import Optional\n", "\n", "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig\n", "\n", "\n", "@dataclass\n", "class TextGenerationInference:\n", " model_path: str = \"my_autotrain_llm_sys_temp_meta_llama_chat\"\n", " use_int4: Optional[bool] = False\n", " use_int8: Optional[bool] = False\n", " temperature: Optional[float] = 0.6\n", " top_k: Optional[int] = 50\n", " top_p: Optional[float] = 0.95\n", " repetition_penalty: Optional[float] = 1.0\n", " num_return_sequences: Optional[int] = 1\n", " num_beams: Optional[int] = 5\n", " max_new_tokens: Optional[int] = 1024\n", " do_sample: Optional[bool] = True\n", "\n", " def __post_init__(self):\n", " self.model = AutoModelForCausalLM.from_pretrained(\n", " self.model_path,\n", " load_in_4bit=self.use_int4,\n", " load_in_8bit=self.use_int8,\n", " torch_dtype=torch.float16,\n", " trust_remote_code=True,\n", " device_map=\"auto\",\n", " offload_folder=\"/azusers/work/Hemant/data\" # \"./data\"\n", " )\n", " self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)\n", " self.model.eval()\n", " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", " self.generation_config = GenerationConfig(\n", " temperature=self.temperature,\n", " top_k=self.top_k,\n", " top_p=self.top_p,\n", " repetition_penalty=self.repetition_penalty,\n", " num_return_sequences=self.num_return_sequences,\n", " num_beams=self.num_beams,\n", " max_length=self.max_new_tokens,\n", " eos_token_id=self.tokenizer.eos_token_id,\n", " do_sample=self.do_sample,\n", " max_new_tokens=self.max_new_tokens,\n", " )\n", "\n", "# def chat(self, prompt):\n", "# inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n", "# outputs = self.model.generate(**inputs, generation_config=self.generation_config)\n", "# return self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", " \n", "# def chat(self, prompt):\n", "# inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n", "\n", "# # Ensure the generation config uses beam search and returns 5 sequences\n", "# self.generation_config['num_beams'] = 5\n", "# self.generation_config['num_return_sequences'] = 5\n", "\n", "# outputs = self.model.generate(**inputs, **self.generation_config)\n", "\n", "# # Decode each of the returned sequences\n", "# responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n", "# return responses\n", " \n", " def chat(self, prompt):\n", " inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n", "\n", " # Ensure the generation config uses beam search and returns 5 sequences\n", " setattr(self.generation_config, 'num_beams', 5)\n", " setattr(self.generation_config, 'num_return_sequences', 5)\n", "\n", " outputs = self.model.generate(\n", " **inputs, \n", " num_beams=self.generation_config.num_beams, \n", " num_return_sequences=self.generation_config.num_return_sequences\n", " )\n", "\n", " # Decode each of the returned sequences\n", " responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n", " return responses\n", "\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 538, "referenced_widgets": [ "35c2527c83f94eb983b1ae730ed91747", "2392776c72b94666abf1fd5f62549b87", "1c57bab4fd2e47f1b90a3d6758fa77cb", "ef48f3ab7d2c4cefbb4f6227d61d2424", "9e13b105fc8a4207969a41055d0dae4e", "b8e4bd68d27a4d6386814fede84da99e", "94c0c32442fe47fb926be879602cdd1c", "3b8f2fec5e3d4ab69855adf5cd7b0ffb", "73a820a723814b8aba81c89bac03716e", "1a95698310fa41f89c221f7d775fadf7", "8dc5f8affc6e43329e63112a4e552fff" ] }, "id": "DVwk_COZaU5e", "outputId": "ed811938-63d2-4fac-9a19-fc9029f0e0f6" }, "outputs": [], "source": [ "# %%time\n", "# inference = TextGenerationInference() # Create an instance with default settings\n", "\n", "# prompt = \"\"\"\n", "# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "# ### Instruction:\n", "# Continue the story based on the given starting sentence.\n", "\n", "# ### Input:\n", "# Once upon a time,\n", "# \"\"\"\n", "\n", "# response = inference.chat(prompt)\n", "# print(response)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "import torch\n", "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7c38ada366954e7aa5d6456793d72b87", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00:1\u001b[0m\n", "File \u001b[0;32m:14\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, model_path, use_int4, use_int8, temperature, top_k, top_p, repetition_penalty, num_return_sequences, num_beams, max_new_tokens, do_sample)\u001b[0m\n", "Cell \u001b[0;32mIn[14], line 23\u001b[0m, in \u001b[0;36mTextGenerationInference.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__post_init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 23\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_4bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int4\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int8\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/azusers/work/Hemant/data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"./data\"\u001b[39;49;00m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_path, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39meval()\n", "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:563\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 562\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 563\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 564\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 567\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m )\n", "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/modeling_utils.py:3253\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3250\u001b[0m model \u001b[38;5;241m=\u001b[39m quantizer\u001b[38;5;241m.\u001b[39mpost_init_model(model)\n\u001b[1;32m 3252\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _adapter_model_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3253\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3254\u001b[0m \u001b[43m \u001b[49m\u001b[43m_adapter_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3255\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3256\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3257\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3258\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_loading_info:\n\u001b[1;32m 3261\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loading_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:180\u001b[0m, in \u001b[0;36mPeftAdapterMixin.load_adapter\u001b[0;34m(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# Re-dispatch model and hooks in case the model is offloaded to CPU / Disk.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 176\u001b[0m (\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_device_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 177\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhf_device_map\u001b[38;5;241m.\u001b[39mvalues())\u001b[38;5;241m.\u001b[39mintersection({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m})) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 178\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_config) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 179\u001b[0m ):\n\u001b[0;32m--> 180\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dispatch_accelerate_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 181\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:390\u001b[0m, in \u001b[0;36mPeftAdapterMixin._dispatch_accelerate_model\u001b[0;34m(self, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(device_map, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 387\u001b[0m device_map \u001b[38;5;241m=\u001b[39m infer_auto_device_map(\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28mself\u001b[39m, max_memory\u001b[38;5;241m=\u001b[39mmax_memory, no_split_module_classes\u001b[38;5;241m=\u001b[39mno_split_module_classes\n\u001b[1;32m 389\u001b[0m )\n\u001b[0;32m--> 390\u001b[0m \u001b[43mdispatch_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 394\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdispatch_model_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:426\u001b[0m, in \u001b[0;36mdispatch_model\u001b[0;34m(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)\u001b[0m\n\u001b[1;32m 424\u001b[0m device \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(device_map\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m device \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 426\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 429\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 430\u001b[0m )\n", "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:415\u001b[0m, in \u001b[0;36mdispatch_model..add_warning..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m param \u001b[38;5;129;01min\u001b[39;00m model\u001b[38;5;241m.\u001b[39mparameters():\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m param\u001b[38;5;241m.\u001b[39mdevice \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 415\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt move a model that has some modules offloaded to cpu or disk.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "\u001b[0;31mRuntimeError\u001b[0m: You can't move a model that has some modules offloaded to cpu or disk." ] } ], "source": [ "%%time\n", "inference = TextGenerationInference() # Create an instance with default settings\n", "\n", "# Prompt 1\n", "# prompt = \"\"\"\n", "# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "# ### Instruction:\n", "# Write the relevance for the given input description.\n", "\n", "# ### Input:\n", "# Find out about the specified mapped drive\n", "# \"\"\"\n", "\n", "# Prompt 2\n", "# prompt = \"\"\"\n", "# ### Instruction:\n", "# Write the relevance for the given input description.\n", "\n", "# ### Input:\n", "# Find out about the specified mapped drive.\n", "# \"\"\"\n", "\n", "# Prompt 3\n", "prompt = \"\"\"\n", "[INST] <> Write the BigFixRelevance for the following description: Give the processor information <> [/INST]\n", "\"\"\"\n", "\n", "response = inference.chat(prompt)\n", "print(response)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "relative_path = \"./data\"\n", "absolute_path = os.path.abspath(relative_path)\n", "print(absolute_path)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python (whatever you want to call it)", "language": "python", "name": "envname" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "1a95698310fa41f89c221f7d775fadf7": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1c57bab4fd2e47f1b90a3d6758fa77cb": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3b8f2fec5e3d4ab69855adf5cd7b0ffb", "max": 10, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_73a820a723814b8aba81c89bac03716e", "value": 10 } }, "2392776c72b94666abf1fd5f62549b87": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b8e4bd68d27a4d6386814fede84da99e", "placeholder": "​", "style": "IPY_MODEL_94c0c32442fe47fb926be879602cdd1c", "value": "Loading checkpoint shards: 100%" } }, "35c2527c83f94eb983b1ae730ed91747": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2392776c72b94666abf1fd5f62549b87", "IPY_MODEL_1c57bab4fd2e47f1b90a3d6758fa77cb", "IPY_MODEL_ef48f3ab7d2c4cefbb4f6227d61d2424" ], "layout": "IPY_MODEL_9e13b105fc8a4207969a41055d0dae4e" } }, "3b8f2fec5e3d4ab69855adf5cd7b0ffb": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "73a820a723814b8aba81c89bac03716e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8dc5f8affc6e43329e63112a4e552fff": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "94c0c32442fe47fb926be879602cdd1c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9e13b105fc8a4207969a41055d0dae4e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b8e4bd68d27a4d6386814fede84da99e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ef48f3ab7d2c4cefbb4f6227d61d2424": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1a95698310fa41f89c221f7d775fadf7", "placeholder": "​", "style": "IPY_MODEL_8dc5f8affc6e43329e63112a4e552fff", "value": " 10/10 [02:43<00:00, 12.78s/it]" } } } } }, "nbformat": 4, "nbformat_minor": 1 }