hemantk089 commited on
Commit
b347b77
1 Parent(s): 964de79

Upload AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb

Browse files
AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb ADDED
@@ -0,0 +1,1123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "colab": {
8
+ "base_uri": "https://localhost:8080/"
9
+ },
10
+ "id": "JvMRbVLEJlZT",
11
+ "outputId": "306df70b-0c75-4781-a75b-d10957a028a3"
12
+ },
13
+ "outputs": [],
14
+ "source": [
15
+ "# #@title 🤗 AutoTrain LLM\n",
16
+ "# #@markdown In order to use this colab\n",
17
+ "# #@markdown - upload train.csv to a folder named `data/`\n",
18
+ "# #@markdown - train.csv must contain a `text` column\n",
19
+ "# #@markdown - choose a project name if you wish\n",
20
+ "# #@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
21
+ "# #@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
22
+ "# #@markdown - update hyperparameters if you wish\n",
23
+ "# #@markdown - click `Runtime > Run all` or run each cell individually\n",
24
+ "\n",
25
+ "import os\n",
26
+ "# !pip install -U autotrain-advanced > install_logs.txt\n",
27
+ "# !autotrain setup > setup_logs.txt"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 2,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "# pip install transformers --upgrad\n",
37
+ "# !pip install torch --upgrade\n",
38
+ "# !pip install tokenizers --upgrade"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 3,
44
+ "metadata": {
45
+ "id": "A2-_lkBS1WKA"
46
+ },
47
+ "outputs": [],
48
+ "source": [
49
+ "#@markdown ---\n",
50
+ "#@markdown #### Project Config\n",
51
+ "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
52
+ "project_name = 'my_autotrain_llm_sys_temp_meta_llama_chat' # @param {type:\"string\"}\n",
53
+ "model_name = \"meta-llama/Llama-2-7b-chat-hf\" # 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
54
+ "\n",
55
+ "#@markdown ---\n",
56
+ "#@markdown #### Push to Hub?\n",
57
+ "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
58
+ "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
59
+ "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
60
+ "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
61
+ "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
62
+ "hf_token = \"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\" #@param {type:\"string\"}\n",
63
+ "repo_id = \"hemantk089/llm_fine_tuning\" #@param {type:\"string\"}\n",
64
+ "\n",
65
+ "#@markdown ---\n",
66
+ "#@markdown #### Hyperparameters\n",
67
+ "learning_rate = 2e-4 # @param {type:\"number\"}\n",
68
+ "num_epochs = 200 #@param {type:\"number\"}\n",
69
+ "batch_size = 1 # @param {type:\"slider\", min:1, max:32, step:1}\n",
70
+ "block_size = 1024 # @param {type:\"number\"}\n",
71
+ "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
72
+ "warmup_ratio = 0.1 # @param {type:\"number\"}\n",
73
+ "weight_decay = 0.01 # @param {type:\"number\"}\n",
74
+ "gradient_accumulation = 4 # @param {type:\"number\"}\n",
75
+ "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
76
+ "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
77
+ "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
78
+ "lora_r = 16 #@param {type:\"number\"}\n",
79
+ "lora_alpha = 32 #@param {type:\"number\"}\n",
80
+ "lora_dropout = 0.05 #@param {type:\"number\"}\n",
81
+ "\n",
82
+ "os.environ[\"PROJECT_NAME\"] = project_name\n",
83
+ "os.environ[\"MODEL_NAME\"] = model_name\n",
84
+ "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
85
+ "os.environ[\"HF_TOKEN\"] = hf_token\n",
86
+ "os.environ[\"REPO_ID\"] = repo_id\n",
87
+ "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
88
+ "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
89
+ "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
90
+ "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
91
+ "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
92
+ "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
93
+ "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
94
+ "os.environ[\"USE_FP16\"] = str(use_fp16)\n",
95
+ "os.environ[\"USE_PEFT\"] = str(use_peft)\n",
96
+ "os.environ[\"USE_INT4\"] = str(use_int4)\n",
97
+ "os.environ[\"LORA_R\"] = str(lora_r)\n",
98
+ "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
99
+ "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 4,
105
+ "metadata": {
106
+ "colab": {
107
+ "base_uri": "https://localhost:8080/"
108
+ },
109
+ "id": "g3cd_ED_yXXt",
110
+ "outputId": "d753c017-cf19-4822-b8ea-c9e6b70fc2d1"
111
+ },
112
+ "outputs": [
113
+ {
114
+ "name": "stdout",
115
+ "output_type": "stream",
116
+ "text": [
117
+ "[2023-10-04 14:41:59,153] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
118
+ "> \u001b[1mINFO Running LLM\u001b[0m\n",
119
+ "> \u001b[1mINFO Params: Namespace(add_eos_token=False, auto_find_batch_size=False, backend='default', block_size=1024, data_path='data/', deploy=False, evaluation_strategy='epoch', fp16=True, func=<function run_llm_command_factory at 0x1468523be1f0>, gradient_accumulation_steps=4, inference=False, learning_rate=0.0002, logging_steps=-1, lora_alpha=32, lora_dropout=0.05, lora_r=16, max_grad_norm=1.0, merge_adapter=False, model='meta-llama/Llama-2-7b-chat-hf', model_max_length=1024, num_train_epochs=200, optimizer='adamw_torch', project_name='my_autotrain_llm_sys_temp_meta_llama_chat', push_to_hub=False, repo_id=None, save_strategy='epoch', save_total_limit=1, scheduler='linear', seed=42, target_modules=None, text_column='text', token=None, train=True, train_batch_size=1, train_split='train', trainer='default', use_flash_attention_2=False, use_int4=True, use_int8=False, use_peft=True, username=None, valid_split=None, version=False, warmup_ratio=0.1, weight_decay=0.01)\u001b[0m\n",
120
+ "> \u001b[1mINFO loading dataset from csv\u001b[0m\n",
121
+ "Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.\n",
122
+ "Using pad_token, but it is not set yet.\n",
123
+ "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00, 1.78s/it]\n",
124
+ "You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc\n",
125
+ "Running tokenizer on train dataset: 100%|█| 90/90 [00:00<00:00, 8654.20 examples\n",
126
+ "Grouping texts in chunks of 1024 (num_proc=4): 100%|█| 90/90 [00:00<00:00, 729.7\n",
127
+ "> \u001b[1mINFO creating trainer\u001b[0m\n",
128
+ "{'loss': 2.5454, 'learning_rate': 1e-05, 'epoch': 1.0} \n",
129
+ "{'loss': 2.5454, 'learning_rate': 2e-05, 'epoch': 2.0} \n",
130
+ "{'loss': 2.5344, 'learning_rate': 3e-05, 'epoch': 3.0} \n",
131
+ "{'loss': 2.505, 'learning_rate': 4e-05, 'epoch': 4.0} \n",
132
+ "{'loss': 2.4569, 'learning_rate': 5e-05, 'epoch': 5.0} \n",
133
+ "{'loss': 2.3922, 'learning_rate': 6e-05, 'epoch': 6.0} \n",
134
+ "{'loss': 2.3117, 'learning_rate': 7e-05, 'epoch': 7.0} \n",
135
+ "{'loss': 2.2224, 'learning_rate': 8e-05, 'epoch': 8.0} \n",
136
+ "{'loss': 2.1331, 'learning_rate': 9e-05, 'epoch': 9.0} \n",
137
+ "{'loss': 2.0432, 'learning_rate': 0.0001, 'epoch': 10.0} \n",
138
+ "{'loss': 1.9529, 'learning_rate': 0.00011000000000000002, 'epoch': 11.0} \n",
139
+ "{'loss': 1.8601, 'learning_rate': 0.00012, 'epoch': 12.0} \n",
140
+ "{'loss': 1.7627, 'learning_rate': 0.00013000000000000002, 'epoch': 13.0} \n",
141
+ "{'loss': 1.6574, 'learning_rate': 0.00014, 'epoch': 14.0} \n",
142
+ "{'loss': 1.5425, 'learning_rate': 0.00015000000000000001, 'epoch': 15.0} \n",
143
+ "{'loss': 1.4224, 'learning_rate': 0.00016, 'epoch': 16.0} \n",
144
+ "{'loss': 1.3218, 'learning_rate': 0.00017, 'epoch': 17.0} \n",
145
+ "{'loss': 1.2318, 'learning_rate': 0.00018, 'epoch': 18.0} \n",
146
+ "{'loss': 1.1835, 'learning_rate': 0.00019, 'epoch': 19.0} \n",
147
+ "{'loss': 1.2394, 'learning_rate': 0.0002, 'epoch': 20.0} \n",
148
+ "{'loss': 1.2134, 'learning_rate': 0.0001988888888888889, 'epoch': 21.0} \n",
149
+ "{'loss': 1.1547, 'learning_rate': 0.00019777777777777778, 'epoch': 22.0} \n",
150
+ "{'loss': 1.1043, 'learning_rate': 0.00019666666666666666, 'epoch': 23.0} \n",
151
+ "{'loss': 1.0702, 'learning_rate': 0.00019555555555555556, 'epoch': 24.0} \n",
152
+ "{'loss': 1.0691, 'learning_rate': 0.00019444444444444446, 'epoch': 25.0} \n",
153
+ "{'loss': 1.043, 'learning_rate': 0.00019333333333333333, 'epoch': 26.0} \n",
154
+ "{'loss': 1.0063, 'learning_rate': 0.00019222222222222224, 'epoch': 27.0} \n",
155
+ "{'loss': 0.9849, 'learning_rate': 0.00019111111111111114, 'epoch': 28.0} \n",
156
+ "{'loss': 0.9743, 'learning_rate': 0.00019, 'epoch': 29.0} \n",
157
+ "{'loss': 0.9561, 'learning_rate': 0.00018888888888888888, 'epoch': 30.0} \n",
158
+ "{'loss': 0.933, 'learning_rate': 0.00018777777777777779, 'epoch': 31.0} \n",
159
+ "{'loss': 0.9131, 'learning_rate': 0.0001866666666666667, 'epoch': 32.0} \n",
160
+ "{'loss': 0.9, 'learning_rate': 0.00018555555555555556, 'epoch': 33.0} \n",
161
+ "{'loss': 0.8814, 'learning_rate': 0.00018444444444444446, 'epoch': 34.0} \n",
162
+ "{'loss': 0.8579, 'learning_rate': 0.00018333333333333334, 'epoch': 35.0} \n",
163
+ "{'loss': 0.836, 'learning_rate': 0.00018222222222222224, 'epoch': 36.0} \n",
164
+ "{'loss': 0.8163, 'learning_rate': 0.0001811111111111111, 'epoch': 37.0} \n",
165
+ "{'loss': 0.7901, 'learning_rate': 0.00018, 'epoch': 38.0} \n",
166
+ "{'loss': 0.7646, 'learning_rate': 0.0001788888888888889, 'epoch': 39.0} \n",
167
+ "{'loss': 0.743, 'learning_rate': 0.00017777777777777779, 'epoch': 40.0} \n",
168
+ "{'loss': 0.7167, 'learning_rate': 0.00017666666666666666, 'epoch': 41.0} \n",
169
+ "{'loss': 0.6906, 'learning_rate': 0.00017555555555555556, 'epoch': 42.0} \n",
170
+ "{'loss': 0.6648, 'learning_rate': 0.00017444444444444446, 'epoch': 43.0} \n",
171
+ "{'loss': 0.6364, 'learning_rate': 0.00017333333333333334, 'epoch': 44.0} \n",
172
+ "{'loss': 0.6111, 'learning_rate': 0.00017222222222222224, 'epoch': 45.0} \n",
173
+ "{'loss': 0.5821, 'learning_rate': 0.0001711111111111111, 'epoch': 46.0} \n",
174
+ "{'loss': 0.5554, 'learning_rate': 0.00017, 'epoch': 47.0} \n",
175
+ "{'loss': 0.5251, 'learning_rate': 0.00016888888888888889, 'epoch': 48.0} \n",
176
+ "{'loss': 0.4958, 'learning_rate': 0.0001677777777777778, 'epoch': 49.0} \n",
177
+ "{'loss': 0.4603, 'learning_rate': 0.0001666666666666667, 'epoch': 50.0} \n",
178
+ "{'loss': 0.4281, 'learning_rate': 0.00016555555555555556, 'epoch': 51.0} \n",
179
+ "{'loss': 0.3929, 'learning_rate': 0.00016444444444444444, 'epoch': 52.0} \n",
180
+ "{'loss': 0.3468, 'learning_rate': 0.00016333333333333334, 'epoch': 53.0} \n",
181
+ "{'loss': 0.3018, 'learning_rate': 0.00016222222222222224, 'epoch': 54.0} \n",
182
+ "{'loss': 0.2756, 'learning_rate': 0.00016222222222222224, 'epoch': 55.0} \n",
183
+ "{'loss': 0.2626, 'learning_rate': 0.0001611111111111111, 'epoch': 56.0} \n",
184
+ "{'loss': 0.2279, 'learning_rate': 0.00016, 'epoch': 57.0} \n",
185
+ "{'loss': 0.2086, 'learning_rate': 0.0001588888888888889, 'epoch': 58.0} \n",
186
+ "{'loss': 0.1972, 'learning_rate': 0.0001577777777777778, 'epoch': 59.0} \n",
187
+ "{'loss': 0.179, 'learning_rate': 0.00015666666666666666, 'epoch': 60.0} \n",
188
+ "{'loss': 0.156, 'learning_rate': 0.00015555555555555556, 'epoch': 61.0} \n",
189
+ "{'loss': 0.1361, 'learning_rate': 0.00015444444444444446, 'epoch': 62.0} \n",
190
+ "{'loss': 0.1177, 'learning_rate': 0.00015333333333333334, 'epoch': 63.0} \n",
191
+ "{'loss': 0.1009, 'learning_rate': 0.0001522222222222222, 'epoch': 64.0} \n",
192
+ "{'loss': 0.0834, 'learning_rate': 0.0001511111111111111, 'epoch': 65.0} \n",
193
+ "{'loss': 0.0682, 'learning_rate': 0.00015000000000000001, 'epoch': 66.0} \n",
194
+ "{'loss': 0.057, 'learning_rate': 0.0001488888888888889, 'epoch': 67.0} \n",
195
+ "{'loss': 0.0469, 'learning_rate': 0.0001477777777777778, 'epoch': 68.0} \n",
196
+ "{'loss': 0.0382, 'learning_rate': 0.00014666666666666666, 'epoch': 69.0} \n",
197
+ "{'loss': 0.0327, 'learning_rate': 0.00014555555555555556, 'epoch': 70.0} \n",
198
+ "{'loss': 0.0272, 'learning_rate': 0.00014444444444444444, 'epoch': 71.0} \n",
199
+ "{'loss': 0.0222, 'learning_rate': 0.00014333333333333334, 'epoch': 72.0} \n",
200
+ "{'loss': 0.0191, 'learning_rate': 0.00014222222222222224, 'epoch': 73.0} \n",
201
+ "{'loss': 0.0165, 'learning_rate': 0.00014111111111111111, 'epoch': 74.0} \n",
202
+ "{'loss': 0.0145, 'learning_rate': 0.00014, 'epoch': 75.0} \n",
203
+ "{'loss': 0.0133, 'learning_rate': 0.0001388888888888889, 'epoch': 76.0} \n",
204
+ "{'loss': 0.0122, 'learning_rate': 0.0001377777777777778, 'epoch': 77.0} \n",
205
+ "{'loss': 0.011, 'learning_rate': 0.00013666666666666666, 'epoch': 78.0} \n",
206
+ "{'loss': 0.0104, 'learning_rate': 0.00013555555555555556, 'epoch': 79.0} \n",
207
+ "{'loss': 0.0099, 'learning_rate': 0.00013444444444444447, 'epoch': 80.0} \n",
208
+ "{'loss': 0.0094, 'learning_rate': 0.00013333333333333334, 'epoch': 81.0} \n",
209
+ "{'loss': 0.009, 'learning_rate': 0.00013222222222222221, 'epoch': 82.0} \n",
210
+ "{'loss': 0.0088, 'learning_rate': 0.00013111111111111111, 'epoch': 83.0} \n",
211
+ "{'loss': 0.0087, 'learning_rate': 0.00013000000000000002, 'epoch': 84.0} \n",
212
+ "{'loss': 0.0083, 'learning_rate': 0.00012888888888888892, 'epoch': 85.0} \n",
213
+ "{'loss': 0.0083, 'learning_rate': 0.00012777777777777776, 'epoch': 86.0} \n",
214
+ "{'loss': 0.0077, 'learning_rate': 0.00012666666666666666, 'epoch': 87.0} \n",
215
+ "{'loss': 0.0077, 'learning_rate': 0.00012555555555555557, 'epoch': 88.0} \n",
216
+ "{'loss': 0.0075, 'learning_rate': 0.00012444444444444444, 'epoch': 89.0} \n",
217
+ "{'loss': 0.0074, 'learning_rate': 0.00012333333333333334, 'epoch': 90.0} \n",
218
+ "{'loss': 0.0072, 'learning_rate': 0.00012222222222222224, 'epoch': 91.0} \n",
219
+ "{'loss': 0.0073, 'learning_rate': 0.0001211111111111111, 'epoch': 92.0} \n",
220
+ "{'loss': 0.0068, 'learning_rate': 0.00012, 'epoch': 93.0} \n",
221
+ "{'loss': 0.0068, 'learning_rate': 0.00011888888888888889, 'epoch': 94.0} \n",
222
+ "{'loss': 0.0067, 'learning_rate': 0.00011777777777777779, 'epoch': 95.0} \n",
223
+ "{'loss': 0.0066, 'learning_rate': 0.00011666666666666668, 'epoch': 96.0} \n",
224
+ "{'loss': 0.0065, 'learning_rate': 0.00011555555555555555, 'epoch': 97.0} \n",
225
+ "{'loss': 0.0065, 'learning_rate': 0.00011444444444444444, 'epoch': 98.0} \n",
226
+ "{'loss': 0.0062, 'learning_rate': 0.00011333333333333334, 'epoch': 99.0} \n",
227
+ "{'loss': 0.0062, 'learning_rate': 0.00011222222222222223, 'epoch': 100.0} \n",
228
+ "{'loss': 0.0064, 'learning_rate': 0.00011111111111111112, 'epoch': 101.0} \n",
229
+ "{'loss': 0.006, 'learning_rate': 0.00011000000000000002, 'epoch': 102.0} \n",
230
+ "{'loss': 0.0061, 'learning_rate': 0.00010888888888888889, 'epoch': 103.0} \n",
231
+ "{'loss': 0.0061, 'learning_rate': 0.00010777777777777778, 'epoch': 104.0} \n",
232
+ "{'loss': 0.0061, 'learning_rate': 0.00010666666666666667, 'epoch': 105.0} \n",
233
+ "{'loss': 0.0062, 'learning_rate': 0.00010555555555555557, 'epoch': 106.0} \n",
234
+ "{'loss': 0.006, 'learning_rate': 0.00010444444444444445, 'epoch': 107.0} \n",
235
+ "{'loss': 0.0061, 'learning_rate': 0.00010333333333333334, 'epoch': 108.0} \n",
236
+ "{'loss': 0.0059, 'learning_rate': 0.00010222222222222222, 'epoch': 109.0} \n",
237
+ "{'loss': 0.0059, 'learning_rate': 0.00010111111111111112, 'epoch': 110.0} \n",
238
+ "{'loss': 0.0059, 'learning_rate': 0.0001, 'epoch': 111.0} \n",
239
+ "{'loss': 0.0058, 'learning_rate': 9.888888888888889e-05, 'epoch': 112.0} \n",
240
+ "{'loss': 0.0058, 'learning_rate': 9.777777777777778e-05, 'epoch': 113.0} \n",
241
+ "{'loss': 0.0057, 'learning_rate': 9.666666666666667e-05, 'epoch': 114.0} \n",
242
+ "{'loss': 0.0057, 'learning_rate': 9.555555555555557e-05, 'epoch': 115.0} \n",
243
+ "{'loss': 0.0057, 'learning_rate': 9.444444444444444e-05, 'epoch': 116.0} \n",
244
+ "{'loss': 0.0056, 'learning_rate': 9.333333333333334e-05, 'epoch': 117.0} \n",
245
+ "{'loss': 0.0057, 'learning_rate': 9.222222222222223e-05, 'epoch': 118.0} \n",
246
+ "{'loss': 0.0057, 'learning_rate': 9.111111111111112e-05, 'epoch': 119.0} \n",
247
+ "{'loss': 0.0057, 'learning_rate': 9e-05, 'epoch': 120.0} \n",
248
+ "{'loss': 0.0057, 'learning_rate': 8.888888888888889e-05, 'epoch': 121.0} \n",
249
+ "{'loss': 0.0056, 'learning_rate': 8.777777777777778e-05, 'epoch': 122.0} \n",
250
+ "{'loss': 0.0056, 'learning_rate': 8.666666666666667e-05, 'epoch': 123.0} \n",
251
+ "{'loss': 0.0056, 'learning_rate': 8.555555555555556e-05, 'epoch': 124.0} \n",
252
+ "{'loss': 0.0056, 'learning_rate': 8.444444444444444e-05, 'epoch': 125.0} \n",
253
+ "{'loss': 0.0056, 'learning_rate': 8.333333333333334e-05, 'epoch': 126.0} \n",
254
+ "{'loss': 0.0056, 'learning_rate': 8.222222222222222e-05, 'epoch': 127.0} \n",
255
+ "{'loss': 0.0056, 'learning_rate': 8.111111111111112e-05, 'epoch': 128.0} \n",
256
+ "{'loss': 0.0054, 'learning_rate': 8e-05, 'epoch': 129.0} \n",
257
+ "{'loss': 0.0055, 'learning_rate': 7.88888888888889e-05, 'epoch': 130.0} \n",
258
+ "{'loss': 0.0055, 'learning_rate': 7.777777777777778e-05, 'epoch': 131.0} \n",
259
+ "{'loss': 0.0055, 'learning_rate': 7.666666666666667e-05, 'epoch': 132.0} \n",
260
+ "{'loss': 0.0054, 'learning_rate': 7.555555555555556e-05, 'epoch': 133.0} \n",
261
+ "{'loss': 0.0055, 'learning_rate': 7.444444444444444e-05, 'epoch': 134.0} \n",
262
+ "{'loss': 0.0055, 'learning_rate': 7.333333333333333e-05, 'epoch': 135.0} \n",
263
+ "{'loss': 0.0055, 'learning_rate': 7.222222222222222e-05, 'epoch': 136.0} \n",
264
+ "{'loss': 0.0054, 'learning_rate': 7.111111111111112e-05, 'epoch': 137.0} \n",
265
+ "{'loss': 0.0054, 'learning_rate': 7e-05, 'epoch': 138.0} \n",
266
+ "{'loss': 0.0053, 'learning_rate': 6.88888888888889e-05, 'epoch': 139.0} \n",
267
+ "{'loss': 0.0053, 'learning_rate': 6.777777777777778e-05, 'epoch': 140.0} \n",
268
+ "{'loss': 0.0055, 'learning_rate': 6.666666666666667e-05, 'epoch': 141.0} \n",
269
+ "{'loss': 0.0053, 'learning_rate': 6.555555555555556e-05, 'epoch': 142.0} \n",
270
+ "{'loss': 0.0054, 'learning_rate': 6.444444444444446e-05, 'epoch': 143.0} \n",
271
+ "{'loss': 0.0054, 'learning_rate': 6.333333333333333e-05, 'epoch': 144.0} \n",
272
+ "{'loss': 0.0052, 'learning_rate': 6.222222222222222e-05, 'epoch': 145.0} \n",
273
+ "{'loss': 0.0053, 'learning_rate': 6.111111111111112e-05, 'epoch': 146.0} \n",
274
+ "{'loss': 0.0053, 'learning_rate': 6e-05, 'epoch': 147.0} \n",
275
+ "{'loss': 0.0052, 'learning_rate': 5.8888888888888896e-05, 'epoch': 148.0} \n",
276
+ "{'loss': 0.0051, 'learning_rate': 5.7777777777777776e-05, 'epoch': 149.0} \n",
277
+ "{'loss': 0.0053, 'learning_rate': 5.666666666666667e-05, 'epoch': 150.0} \n",
278
+ "{'loss': 0.0052, 'learning_rate': 5.555555555555556e-05, 'epoch': 151.0} \n",
279
+ "{'loss': 0.0053, 'learning_rate': 5.4444444444444446e-05, 'epoch': 152.0} \n",
280
+ "{'loss': 0.0053, 'learning_rate': 5.333333333333333e-05, 'epoch': 153.0} \n",
281
+ "{'loss': 0.0052, 'learning_rate': 5.222222222222223e-05, 'epoch': 154.0} \n",
282
+ "{'loss': 0.0051, 'learning_rate': 5.111111111111111e-05, 'epoch': 155.0} \n",
283
+ "{'loss': 0.0051, 'learning_rate': 5e-05, 'epoch': 156.0} \n",
284
+ "{'loss': 0.0053, 'learning_rate': 4.888888888888889e-05, 'epoch': 157.0} \n",
285
+ "{'loss': 0.0053, 'learning_rate': 4.7777777777777784e-05, 'epoch': 158.0} \n",
286
+ "{'loss': 0.0052, 'learning_rate': 4.666666666666667e-05, 'epoch': 159.0} \n",
287
+ "{'loss': 0.0051, 'learning_rate': 4.555555555555556e-05, 'epoch': 160.0} \n",
288
+ "{'loss': 0.0053, 'learning_rate': 4.4444444444444447e-05, 'epoch': 161.0} \n",
289
+ "{'loss': 0.0052, 'learning_rate': 4.3333333333333334e-05, 'epoch': 162.0} \n",
290
+ "{'loss': 0.0051, 'learning_rate': 4.222222222222222e-05, 'epoch': 163.0} \n",
291
+ "{'loss': 0.0052, 'learning_rate': 4.111111111111111e-05, 'epoch': 164.0} \n",
292
+ "{'loss': 0.0051, 'learning_rate': 4e-05, 'epoch': 165.0} \n",
293
+ "{'loss': 0.0053, 'learning_rate': 3.888888888888889e-05, 'epoch': 166.0} \n",
294
+ "{'loss': 0.0051, 'learning_rate': 3.777777777777778e-05, 'epoch': 167.0} \n",
295
+ "{'loss': 0.0051, 'learning_rate': 3.6666666666666666e-05, 'epoch': 168.0} \n",
296
+ "{'loss': 0.0051, 'learning_rate': 3.555555555555556e-05, 'epoch': 169.0} \n",
297
+ "{'loss': 0.005, 'learning_rate': 3.444444444444445e-05, 'epoch': 170.0} \n",
298
+ "{'loss': 0.005, 'learning_rate': 3.3333333333333335e-05, 'epoch': 171.0} \n",
299
+ "{'loss': 0.0051, 'learning_rate': 3.222222222222223e-05, 'epoch': 172.0} \n",
300
+ "{'loss': 0.0051, 'learning_rate': 3.111111111111111e-05, 'epoch': 173.0} \n"
301
+ ]
302
+ },
303
+ {
304
+ "name": "stdout",
305
+ "output_type": "stream",
306
+ "text": [
307
+ "{'loss': 0.0051, 'learning_rate': 3e-05, 'epoch': 174.0} \n",
308
+ "{'loss': 0.0051, 'learning_rate': 2.8888888888888888e-05, 'epoch': 175.0} \n",
309
+ "{'loss': 0.005, 'learning_rate': 2.777777777777778e-05, 'epoch': 176.0} \n",
310
+ "{'loss': 0.0052, 'learning_rate': 2.6666666666666667e-05, 'epoch': 177.0} \n",
311
+ "{'loss': 0.005, 'learning_rate': 2.5555555555555554e-05, 'epoch': 178.0} \n",
312
+ "{'loss': 0.005, 'learning_rate': 2.4444444444444445e-05, 'epoch': 179.0} \n",
313
+ "{'loss': 0.005, 'learning_rate': 2.3333333333333336e-05, 'epoch': 180.0} \n",
314
+ "{'loss': 0.005, 'learning_rate': 2.2222222222222223e-05, 'epoch': 181.0} \n",
315
+ "{'loss': 0.005, 'learning_rate': 2.111111111111111e-05, 'epoch': 182.0} \n",
316
+ "{'loss': 0.005, 'learning_rate': 2e-05, 'epoch': 183.0} \n",
317
+ "{'loss': 0.0049, 'learning_rate': 1.888888888888889e-05, 'epoch': 184.0} \n",
318
+ "{'loss': 0.0049, 'learning_rate': 1.777777777777778e-05, 'epoch': 185.0} \n",
319
+ "{'loss': 0.005, 'learning_rate': 1.6666666666666667e-05, 'epoch': 186.0} \n",
320
+ "{'loss': 0.005, 'learning_rate': 1.5555555555555555e-05, 'epoch': 187.0} \n",
321
+ "{'loss': 0.0049, 'learning_rate': 1.4444444444444444e-05, 'epoch': 188.0} \n",
322
+ "{'loss': 0.005, 'learning_rate': 1.3333333333333333e-05, 'epoch': 189.0} \n",
323
+ "{'loss': 0.0049, 'learning_rate': 1.2222222222222222e-05, 'epoch': 190.0} \n",
324
+ "{'loss': 0.0049, 'learning_rate': 1.1111111111111112e-05, 'epoch': 191.0} \n",
325
+ "{'loss': 0.0049, 'learning_rate': 1e-05, 'epoch': 192.0} \n",
326
+ "{'loss': 0.0049, 'learning_rate': 8.88888888888889e-06, 'epoch': 193.0} \n",
327
+ "{'loss': 0.0048, 'learning_rate': 7.777777777777777e-06, 'epoch': 194.0} \n",
328
+ "{'loss': 0.0049, 'learning_rate': 6.666666666666667e-06, 'epoch': 195.0} \n",
329
+ "{'loss': 0.005, 'learning_rate': 5.555555555555556e-06, 'epoch': 196.0} \n",
330
+ "{'loss': 0.005, 'learning_rate': 4.444444444444445e-06, 'epoch': 197.0} \n",
331
+ "{'loss': 0.0049, 'learning_rate': 3.3333333333333333e-06, 'epoch': 198.0} \n",
332
+ "{'loss': 0.0049, 'learning_rate': 2.2222222222222225e-06, 'epoch': 199.0} \n",
333
+ "{'loss': 0.0048, 'learning_rate': 1.1111111111111112e-06, 'epoch': 200.0} \n",
334
+ "{'train_runtime': 2631.4323, 'train_samples_per_second': 0.304, 'train_steps_per_second': 0.076, 'train_loss': 0.34155846770387144, 'epoch': 200.0}\n",
335
+ "100%|█████████████████████████████████████████| 200/200 [43:51<00:00, 13.16s/it]\n",
336
+ "> \u001b[1mINFO Finished training, saving model...\u001b[0m\n",
337
+ "CPU times: user 12.4 s, sys: 4.81 s, total: 17.3 s\n",
338
+ "Wall time: 44min 7s\n"
339
+ ]
340
+ }
341
+ ],
342
+ "source": [
343
+ "%%time\n",
344
+ "!autotrain llm \\\n",
345
+ "--train \\\n",
346
+ "--model ${MODEL_NAME} \\\n",
347
+ "--project-name ${PROJECT_NAME} \\\n",
348
+ "--data-path data/ \\\n",
349
+ "--text-column text \\\n",
350
+ "--lr ${LEARNING_RATE} \\\n",
351
+ "--batch-size ${BATCH_SIZE} \\\n",
352
+ "--epochs ${NUM_EPOCHS} \\\n",
353
+ "--block-size ${BLOCK_SIZE} \\\n",
354
+ "--warmup-ratio ${WARMUP_RATIO} \\\n",
355
+ "--lora-r ${LORA_R} \\\n",
356
+ "--lora-alpha ${LORA_ALPHA} \\\n",
357
+ "--lora-dropout ${LORA_DROPOUT} \\\n",
358
+ "--weight-decay ${WEIGHT_DECAY} \\\n",
359
+ "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
360
+ "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
361
+ "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
362
+ "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
363
+ "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": 5,
369
+ "metadata": {
370
+ "id": "gdGQQoED1WSd"
371
+ },
372
+ "outputs": [],
373
+ "source": [
374
+ "# !mkdir data"
375
+ ]
376
+ },
377
+ {
378
+ "cell_type": "code",
379
+ "execution_count": 6,
380
+ "metadata": {
381
+ "id": "J3_aYwtv5LtN"
382
+ },
383
+ "outputs": [],
384
+ "source": [
385
+ "# cd data"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": 7,
391
+ "metadata": {
392
+ "id": "FtoUbYWR5RSD"
393
+ },
394
+ "outputs": [],
395
+ "source": [
396
+ "import pandas as pd\n",
397
+ "df = pd.read_csv('./data/train.csv')"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": 8,
403
+ "metadata": {
404
+ "colab": {
405
+ "base_uri": "https://localhost:8080/",
406
+ "height": 536
407
+ },
408
+ "id": "Vg9SeMOf8Zh5",
409
+ "outputId": "4d86182d-dd4e-4397-cdf9-b86c3816cd4e"
410
+ },
411
+ "outputs": [
412
+ {
413
+ "name": "stdout",
414
+ "output_type": "stream",
415
+ "text": [
416
+ "(90, 3)\n"
417
+ ]
418
+ },
419
+ {
420
+ "data": {
421
+ "text/html": [
422
+ "<div>\n",
423
+ "<style scoped>\n",
424
+ " .dataframe tbody tr th:only-of-type {\n",
425
+ " vertical-align: middle;\n",
426
+ " }\n",
427
+ "\n",
428
+ " .dataframe tbody tr th {\n",
429
+ " vertical-align: top;\n",
430
+ " }\n",
431
+ "\n",
432
+ " .dataframe thead th {\n",
433
+ " text-align: right;\n",
434
+ " }\n",
435
+ "</style>\n",
436
+ "<table border=\"1\" class=\"dataframe\">\n",
437
+ " <thead>\n",
438
+ " <tr style=\"text-align: right;\">\n",
439
+ " <th></th>\n",
440
+ " <th>Description</th>\n",
441
+ " <th>Relevances</th>\n",
442
+ " <th>text</th>\n",
443
+ " </tr>\n",
444
+ " </thead>\n",
445
+ " <tbody>\n",
446
+ " <tr>\n",
447
+ " <th>0</th>\n",
448
+ " <td>Give the processor information</td>\n",
449
+ " <td>vendor names of processors</td>\n",
450
+ " <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
451
+ " </tr>\n",
452
+ " <tr>\n",
453
+ " <th>1</th>\n",
454
+ " <td>find the speed of processor</td>\n",
455
+ " <td>speeds of processors</td>\n",
456
+ " <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
457
+ " </tr>\n",
458
+ " <tr>\n",
459
+ " <th>2</th>\n",
460
+ " <td>find unique processor names</td>\n",
461
+ " <td>unique values of vendor names of processors</td>\n",
462
+ " <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
463
+ " </tr>\n",
464
+ " <tr>\n",
465
+ " <th>3</th>\n",
466
+ " <td>find name of operating system</td>\n",
467
+ " <td>name of operating system</td>\n",
468
+ " <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
469
+ " </tr>\n",
470
+ " <tr>\n",
471
+ " <th>4</th>\n",
472
+ " <td>find family names of processor</td>\n",
473
+ " <td>family names of processors</td>\n",
474
+ " <td>&lt;s&gt;[INST] &lt;&lt;SYS&gt;&gt; Write the BigFixRelevance fo...</td>\n",
475
+ " </tr>\n",
476
+ " </tbody>\n",
477
+ "</table>\n",
478
+ "</div>"
479
+ ],
480
+ "text/plain": [
481
+ " Description \\\n",
482
+ "0 Give the processor information \n",
483
+ "1 find the speed of processor \n",
484
+ "2 find unique processor names \n",
485
+ "3 find name of operating system \n",
486
+ "4 find family names of processor \n",
487
+ "\n",
488
+ " Relevances \\\n",
489
+ "0 vendor names of processors \n",
490
+ "1 speeds of processors \n",
491
+ "2 unique values of vendor names of processors \n",
492
+ "3 name of operating system \n",
493
+ "4 family names of processors \n",
494
+ "\n",
495
+ " text \n",
496
+ "0 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
497
+ "1 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
498
+ "2 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
499
+ "3 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
500
+ "4 <s>[INST] <<SYS>> Write the BigFixRelevance fo... "
501
+ ]
502
+ },
503
+ "execution_count": 8,
504
+ "metadata": {},
505
+ "output_type": "execute_result"
506
+ }
507
+ ],
508
+ "source": [
509
+ "print(df.shape)\n",
510
+ "df.head(5)"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "code",
515
+ "execution_count": 14,
516
+ "metadata": {
517
+ "id": "ZxC7Rw7TFTPY"
518
+ },
519
+ "outputs": [],
520
+ "source": [
521
+ "from dataclasses import dataclass\n",
522
+ "from typing import Optional\n",
523
+ "\n",
524
+ "import torch\n",
525
+ "from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig\n",
526
+ "\n",
527
+ "\n",
528
+ "@dataclass\n",
529
+ "class TextGenerationInference:\n",
530
+ " model_path: str = \"my_autotrain_llm_sys_temp_meta_llama_chat\"\n",
531
+ " use_int4: Optional[bool] = False\n",
532
+ " use_int8: Optional[bool] = False\n",
533
+ " temperature: Optional[float] = 0.6\n",
534
+ " top_k: Optional[int] = 50\n",
535
+ " top_p: Optional[float] = 0.95\n",
536
+ " repetition_penalty: Optional[float] = 1.0\n",
537
+ " num_return_sequences: Optional[int] = 1\n",
538
+ " num_beams: Optional[int] = 5\n",
539
+ " max_new_tokens: Optional[int] = 1024\n",
540
+ " do_sample: Optional[bool] = True\n",
541
+ "\n",
542
+ " def __post_init__(self):\n",
543
+ " self.model = AutoModelForCausalLM.from_pretrained(\n",
544
+ " self.model_path,\n",
545
+ " load_in_4bit=self.use_int4,\n",
546
+ " load_in_8bit=self.use_int8,\n",
547
+ " torch_dtype=torch.float16,\n",
548
+ " trust_remote_code=True,\n",
549
+ " device_map=\"auto\",\n",
550
+ " offload_folder=\"/azusers/work/Hemant/data\" # \"./data\"\n",
551
+ " )\n",
552
+ " self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)\n",
553
+ " self.model.eval()\n",
554
+ " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
555
+ " self.generation_config = GenerationConfig(\n",
556
+ " temperature=self.temperature,\n",
557
+ " top_k=self.top_k,\n",
558
+ " top_p=self.top_p,\n",
559
+ " repetition_penalty=self.repetition_penalty,\n",
560
+ " num_return_sequences=self.num_return_sequences,\n",
561
+ " num_beams=self.num_beams,\n",
562
+ " max_length=self.max_new_tokens,\n",
563
+ " eos_token_id=self.tokenizer.eos_token_id,\n",
564
+ " do_sample=self.do_sample,\n",
565
+ " max_new_tokens=self.max_new_tokens,\n",
566
+ " )\n",
567
+ "\n",
568
+ "# def chat(self, prompt):\n",
569
+ "# inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
570
+ "# outputs = self.model.generate(**inputs, generation_config=self.generation_config)\n",
571
+ "# return self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
572
+ " \n",
573
+ "# def chat(self, prompt):\n",
574
+ "# inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
575
+ "\n",
576
+ "# # Ensure the generation config uses beam search and returns 5 sequences\n",
577
+ "# self.generation_config['num_beams'] = 5\n",
578
+ "# self.generation_config['num_return_sequences'] = 5\n",
579
+ "\n",
580
+ "# outputs = self.model.generate(**inputs, **self.generation_config)\n",
581
+ "\n",
582
+ "# # Decode each of the returned sequences\n",
583
+ "# responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n",
584
+ "# return responses\n",
585
+ " \n",
586
+ " def chat(self, prompt):\n",
587
+ " inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
588
+ "\n",
589
+ " # Ensure the generation config uses beam search and returns 5 sequences\n",
590
+ " setattr(self.generation_config, 'num_beams', 5)\n",
591
+ " setattr(self.generation_config, 'num_return_sequences', 5)\n",
592
+ "\n",
593
+ " outputs = self.model.generate(\n",
594
+ " **inputs, \n",
595
+ " num_beams=self.generation_config.num_beams, \n",
596
+ " num_return_sequences=self.generation_config.num_return_sequences\n",
597
+ " )\n",
598
+ "\n",
599
+ " # Decode each of the returned sequences\n",
600
+ " responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n",
601
+ " return responses\n",
602
+ "\n",
603
+ "\n",
604
+ " "
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "code",
609
+ "execution_count": null,
610
+ "metadata": {
611
+ "colab": {
612
+ "base_uri": "https://localhost:8080/",
613
+ "height": 538,
614
+ "referenced_widgets": [
615
+ "35c2527c83f94eb983b1ae730ed91747",
616
+ "2392776c72b94666abf1fd5f62549b87",
617
+ "1c57bab4fd2e47f1b90a3d6758fa77cb",
618
+ "ef48f3ab7d2c4cefbb4f6227d61d2424",
619
+ "9e13b105fc8a4207969a41055d0dae4e",
620
+ "b8e4bd68d27a4d6386814fede84da99e",
621
+ "94c0c32442fe47fb926be879602cdd1c",
622
+ "3b8f2fec5e3d4ab69855adf5cd7b0ffb",
623
+ "73a820a723814b8aba81c89bac03716e",
624
+ "1a95698310fa41f89c221f7d775fadf7",
625
+ "8dc5f8affc6e43329e63112a4e552fff"
626
+ ]
627
+ },
628
+ "id": "DVwk_COZaU5e",
629
+ "outputId": "ed811938-63d2-4fac-9a19-fc9029f0e0f6"
630
+ },
631
+ "outputs": [],
632
+ "source": [
633
+ "# %%time\n",
634
+ "# inference = TextGenerationInference() # Create an instance with default settings\n",
635
+ "\n",
636
+ "# prompt = \"\"\"\n",
637
+ "# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
638
+ "\n",
639
+ "# ### Instruction:\n",
640
+ "# Continue the story based on the given starting sentence.\n",
641
+ "\n",
642
+ "# ### Input:\n",
643
+ "# Once upon a time,\n",
644
+ "# \"\"\"\n",
645
+ "\n",
646
+ "# response = inference.chat(prompt)\n",
647
+ "# print(response)"
648
+ ]
649
+ },
650
+ {
651
+ "cell_type": "code",
652
+ "execution_count": 16,
653
+ "metadata": {},
654
+ "outputs": [],
655
+ "source": [
656
+ "import torch\n",
657
+ "torch.cuda.empty_cache()"
658
+ ]
659
+ },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 15,
663
+ "metadata": {},
664
+ "outputs": [
665
+ {
666
+ "data": {
667
+ "application/vnd.jupyter.widget-view+json": {
668
+ "model_id": "7c38ada366954e7aa5d6456793d72b87",
669
+ "version_major": 2,
670
+ "version_minor": 0
671
+ },
672
+ "text/plain": [
673
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
674
+ ]
675
+ },
676
+ "metadata": {},
677
+ "output_type": "display_data"
678
+ },
679
+ {
680
+ "name": "stderr",
681
+ "output_type": "stream",
682
+ "text": [
683
+ "You shouldn't move a model when it is dispatched on multiple devices.\n"
684
+ ]
685
+ },
686
+ {
687
+ "ename": "RuntimeError",
688
+ "evalue": "You can't move a model that has some modules offloaded to cpu or disk.",
689
+ "output_type": "error",
690
+ "traceback": [
691
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
692
+ "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
693
+ "File \u001b[0;32m<timed exec>:1\u001b[0m\n",
694
+ "File \u001b[0;32m<string>:14\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, model_path, use_int4, use_int8, temperature, top_k, top_p, repetition_penalty, num_return_sequences, num_beams, max_new_tokens, do_sample)\u001b[0m\n",
695
+ "Cell \u001b[0;32mIn[14], line 23\u001b[0m, in \u001b[0;36mTextGenerationInference.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__post_init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 23\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_4bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int4\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int8\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/azusers/work/Hemant/data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"./data\"\u001b[39;49;00m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_path, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39meval()\n",
696
+ "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:563\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 562\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 563\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 564\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 567\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m )\n",
697
+ "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/modeling_utils.py:3253\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3250\u001b[0m model \u001b[38;5;241m=\u001b[39m quantizer\u001b[38;5;241m.\u001b[39mpost_init_model(model)\n\u001b[1;32m 3252\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _adapter_model_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3253\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3254\u001b[0m \u001b[43m \u001b[49m\u001b[43m_adapter_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3255\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3256\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3257\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3258\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_loading_info:\n\u001b[1;32m 3261\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loading_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
698
+ "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:180\u001b[0m, in \u001b[0;36mPeftAdapterMixin.load_adapter\u001b[0;34m(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# Re-dispatch model and hooks in case the model is offloaded to CPU / Disk.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 176\u001b[0m (\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_device_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 177\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhf_device_map\u001b[38;5;241m.\u001b[39mvalues())\u001b[38;5;241m.\u001b[39mintersection({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m})) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 178\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_config) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 179\u001b[0m ):\n\u001b[0;32m--> 180\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dispatch_accelerate_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 181\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
699
+ "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:390\u001b[0m, in \u001b[0;36mPeftAdapterMixin._dispatch_accelerate_model\u001b[0;34m(self, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(device_map, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 387\u001b[0m device_map \u001b[38;5;241m=\u001b[39m infer_auto_device_map(\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28mself\u001b[39m, max_memory\u001b[38;5;241m=\u001b[39mmax_memory, no_split_module_classes\u001b[38;5;241m=\u001b[39mno_split_module_classes\n\u001b[1;32m 389\u001b[0m )\n\u001b[0;32m--> 390\u001b[0m \u001b[43mdispatch_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 394\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdispatch_model_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
700
+ "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:426\u001b[0m, in \u001b[0;36mdispatch_model\u001b[0;34m(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)\u001b[0m\n\u001b[1;32m 424\u001b[0m device \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(device_map\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m device \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 426\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 429\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 430\u001b[0m )\n",
701
+ "File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:415\u001b[0m, in \u001b[0;36mdispatch_model.<locals>.add_warning.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m param \u001b[38;5;129;01min\u001b[39;00m model\u001b[38;5;241m.\u001b[39mparameters():\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m param\u001b[38;5;241m.\u001b[39mdevice \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 415\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt move a model that has some modules offloaded to cpu or disk.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
702
+ "\u001b[0;31mRuntimeError\u001b[0m: You can't move a model that has some modules offloaded to cpu or disk."
703
+ ]
704
+ }
705
+ ],
706
+ "source": [
707
+ "%%time\n",
708
+ "inference = TextGenerationInference() # Create an instance with default settings\n",
709
+ "\n",
710
+ "# Prompt 1\n",
711
+ "# prompt = \"\"\"\n",
712
+ "# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
713
+ "\n",
714
+ "# ### Instruction:\n",
715
+ "# Write the relevance for the given input description.\n",
716
+ "\n",
717
+ "# ### Input:\n",
718
+ "# Find out about the specified mapped drive\n",
719
+ "# \"\"\"\n",
720
+ "\n",
721
+ "# Prompt 2\n",
722
+ "# prompt = \"\"\"\n",
723
+ "# ### Instruction:\n",
724
+ "# Write the relevance for the given input description.\n",
725
+ "\n",
726
+ "# ### Input:\n",
727
+ "# Find out about the specified mapped drive.\n",
728
+ "# \"\"\"\n",
729
+ "\n",
730
+ "# Prompt 3\n",
731
+ "prompt = \"\"\"\n",
732
+ "<s>[INST] <<SYS>> Write the BigFixRelevance for the following description: Give the processor information <</SYS>> [/INST]\n",
733
+ "\"\"\"\n",
734
+ "\n",
735
+ "response = inference.chat(prompt)\n",
736
+ "print(response)"
737
+ ]
738
+ },
739
+ {
740
+ "cell_type": "code",
741
+ "execution_count": null,
742
+ "metadata": {},
743
+ "outputs": [],
744
+ "source": [
745
+ "relative_path = \"./data\"\n",
746
+ "absolute_path = os.path.abspath(relative_path)\n",
747
+ "print(absolute_path)"
748
+ ]
749
+ }
750
+ ],
751
+ "metadata": {
752
+ "accelerator": "GPU",
753
+ "colab": {
754
+ "gpuType": "T4",
755
+ "provenance": []
756
+ },
757
+ "kernelspec": {
758
+ "display_name": "Python (whatever you want to call it)",
759
+ "language": "python",
760
+ "name": "envname"
761
+ },
762
+ "language_info": {
763
+ "codemirror_mode": {
764
+ "name": "ipython",
765
+ "version": 3
766
+ },
767
+ "file_extension": ".py",
768
+ "mimetype": "text/x-python",
769
+ "name": "python",
770
+ "nbconvert_exporter": "python",
771
+ "pygments_lexer": "ipython3",
772
+ "version": "3.10.13"
773
+ },
774
+ "widgets": {
775
+ "application/vnd.jupyter.widget-state+json": {
776
+ "1a95698310fa41f89c221f7d775fadf7": {
777
+ "model_module": "@jupyter-widgets/base",
778
+ "model_module_version": "1.2.0",
779
+ "model_name": "LayoutModel",
780
+ "state": {
781
+ "_model_module": "@jupyter-widgets/base",
782
+ "_model_module_version": "1.2.0",
783
+ "_model_name": "LayoutModel",
784
+ "_view_count": null,
785
+ "_view_module": "@jupyter-widgets/base",
786
+ "_view_module_version": "1.2.0",
787
+ "_view_name": "LayoutView",
788
+ "align_content": null,
789
+ "align_items": null,
790
+ "align_self": null,
791
+ "border": null,
792
+ "bottom": null,
793
+ "display": null,
794
+ "flex": null,
795
+ "flex_flow": null,
796
+ "grid_area": null,
797
+ "grid_auto_columns": null,
798
+ "grid_auto_flow": null,
799
+ "grid_auto_rows": null,
800
+ "grid_column": null,
801
+ "grid_gap": null,
802
+ "grid_row": null,
803
+ "grid_template_areas": null,
804
+ "grid_template_columns": null,
805
+ "grid_template_rows": null,
806
+ "height": null,
807
+ "justify_content": null,
808
+ "justify_items": null,
809
+ "left": null,
810
+ "margin": null,
811
+ "max_height": null,
812
+ "max_width": null,
813
+ "min_height": null,
814
+ "min_width": null,
815
+ "object_fit": null,
816
+ "object_position": null,
817
+ "order": null,
818
+ "overflow": null,
819
+ "overflow_x": null,
820
+ "overflow_y": null,
821
+ "padding": null,
822
+ "right": null,
823
+ "top": null,
824
+ "visibility": null,
825
+ "width": null
826
+ }
827
+ },
828
+ "1c57bab4fd2e47f1b90a3d6758fa77cb": {
829
+ "model_module": "@jupyter-widgets/controls",
830
+ "model_module_version": "1.5.0",
831
+ "model_name": "FloatProgressModel",
832
+ "state": {
833
+ "_dom_classes": [],
834
+ "_model_module": "@jupyter-widgets/controls",
835
+ "_model_module_version": "1.5.0",
836
+ "_model_name": "FloatProgressModel",
837
+ "_view_count": null,
838
+ "_view_module": "@jupyter-widgets/controls",
839
+ "_view_module_version": "1.5.0",
840
+ "_view_name": "ProgressView",
841
+ "bar_style": "success",
842
+ "description": "",
843
+ "description_tooltip": null,
844
+ "layout": "IPY_MODEL_3b8f2fec5e3d4ab69855adf5cd7b0ffb",
845
+ "max": 10,
846
+ "min": 0,
847
+ "orientation": "horizontal",
848
+ "style": "IPY_MODEL_73a820a723814b8aba81c89bac03716e",
849
+ "value": 10
850
+ }
851
+ },
852
+ "2392776c72b94666abf1fd5f62549b87": {
853
+ "model_module": "@jupyter-widgets/controls",
854
+ "model_module_version": "1.5.0",
855
+ "model_name": "HTMLModel",
856
+ "state": {
857
+ "_dom_classes": [],
858
+ "_model_module": "@jupyter-widgets/controls",
859
+ "_model_module_version": "1.5.0",
860
+ "_model_name": "HTMLModel",
861
+ "_view_count": null,
862
+ "_view_module": "@jupyter-widgets/controls",
863
+ "_view_module_version": "1.5.0",
864
+ "_view_name": "HTMLView",
865
+ "description": "",
866
+ "description_tooltip": null,
867
+ "layout": "IPY_MODEL_b8e4bd68d27a4d6386814fede84da99e",
868
+ "placeholder": "​",
869
+ "style": "IPY_MODEL_94c0c32442fe47fb926be879602cdd1c",
870
+ "value": "Loading checkpoint shards: 100%"
871
+ }
872
+ },
873
+ "35c2527c83f94eb983b1ae730ed91747": {
874
+ "model_module": "@jupyter-widgets/controls",
875
+ "model_module_version": "1.5.0",
876
+ "model_name": "HBoxModel",
877
+ "state": {
878
+ "_dom_classes": [],
879
+ "_model_module": "@jupyter-widgets/controls",
880
+ "_model_module_version": "1.5.0",
881
+ "_model_name": "HBoxModel",
882
+ "_view_count": null,
883
+ "_view_module": "@jupyter-widgets/controls",
884
+ "_view_module_version": "1.5.0",
885
+ "_view_name": "HBoxView",
886
+ "box_style": "",
887
+ "children": [
888
+ "IPY_MODEL_2392776c72b94666abf1fd5f62549b87",
889
+ "IPY_MODEL_1c57bab4fd2e47f1b90a3d6758fa77cb",
890
+ "IPY_MODEL_ef48f3ab7d2c4cefbb4f6227d61d2424"
891
+ ],
892
+ "layout": "IPY_MODEL_9e13b105fc8a4207969a41055d0dae4e"
893
+ }
894
+ },
895
+ "3b8f2fec5e3d4ab69855adf5cd7b0ffb": {
896
+ "model_module": "@jupyter-widgets/base",
897
+ "model_module_version": "1.2.0",
898
+ "model_name": "LayoutModel",
899
+ "state": {
900
+ "_model_module": "@jupyter-widgets/base",
901
+ "_model_module_version": "1.2.0",
902
+ "_model_name": "LayoutModel",
903
+ "_view_count": null,
904
+ "_view_module": "@jupyter-widgets/base",
905
+ "_view_module_version": "1.2.0",
906
+ "_view_name": "LayoutView",
907
+ "align_content": null,
908
+ "align_items": null,
909
+ "align_self": null,
910
+ "border": null,
911
+ "bottom": null,
912
+ "display": null,
913
+ "flex": null,
914
+ "flex_flow": null,
915
+ "grid_area": null,
916
+ "grid_auto_columns": null,
917
+ "grid_auto_flow": null,
918
+ "grid_auto_rows": null,
919
+ "grid_column": null,
920
+ "grid_gap": null,
921
+ "grid_row": null,
922
+ "grid_template_areas": null,
923
+ "grid_template_columns": null,
924
+ "grid_template_rows": null,
925
+ "height": null,
926
+ "justify_content": null,
927
+ "justify_items": null,
928
+ "left": null,
929
+ "margin": null,
930
+ "max_height": null,
931
+ "max_width": null,
932
+ "min_height": null,
933
+ "min_width": null,
934
+ "object_fit": null,
935
+ "object_position": null,
936
+ "order": null,
937
+ "overflow": null,
938
+ "overflow_x": null,
939
+ "overflow_y": null,
940
+ "padding": null,
941
+ "right": null,
942
+ "top": null,
943
+ "visibility": null,
944
+ "width": null
945
+ }
946
+ },
947
+ "73a820a723814b8aba81c89bac03716e": {
948
+ "model_module": "@jupyter-widgets/controls",
949
+ "model_module_version": "1.5.0",
950
+ "model_name": "ProgressStyleModel",
951
+ "state": {
952
+ "_model_module": "@jupyter-widgets/controls",
953
+ "_model_module_version": "1.5.0",
954
+ "_model_name": "ProgressStyleModel",
955
+ "_view_count": null,
956
+ "_view_module": "@jupyter-widgets/base",
957
+ "_view_module_version": "1.2.0",
958
+ "_view_name": "StyleView",
959
+ "bar_color": null,
960
+ "description_width": ""
961
+ }
962
+ },
963
+ "8dc5f8affc6e43329e63112a4e552fff": {
964
+ "model_module": "@jupyter-widgets/controls",
965
+ "model_module_version": "1.5.0",
966
+ "model_name": "DescriptionStyleModel",
967
+ "state": {
968
+ "_model_module": "@jupyter-widgets/controls",
969
+ "_model_module_version": "1.5.0",
970
+ "_model_name": "DescriptionStyleModel",
971
+ "_view_count": null,
972
+ "_view_module": "@jupyter-widgets/base",
973
+ "_view_module_version": "1.2.0",
974
+ "_view_name": "StyleView",
975
+ "description_width": ""
976
+ }
977
+ },
978
+ "94c0c32442fe47fb926be879602cdd1c": {
979
+ "model_module": "@jupyter-widgets/controls",
980
+ "model_module_version": "1.5.0",
981
+ "model_name": "DescriptionStyleModel",
982
+ "state": {
983
+ "_model_module": "@jupyter-widgets/controls",
984
+ "_model_module_version": "1.5.0",
985
+ "_model_name": "DescriptionStyleModel",
986
+ "_view_count": null,
987
+ "_view_module": "@jupyter-widgets/base",
988
+ "_view_module_version": "1.2.0",
989
+ "_view_name": "StyleView",
990
+ "description_width": ""
991
+ }
992
+ },
993
+ "9e13b105fc8a4207969a41055d0dae4e": {
994
+ "model_module": "@jupyter-widgets/base",
995
+ "model_module_version": "1.2.0",
996
+ "model_name": "LayoutModel",
997
+ "state": {
998
+ "_model_module": "@jupyter-widgets/base",
999
+ "_model_module_version": "1.2.0",
1000
+ "_model_name": "LayoutModel",
1001
+ "_view_count": null,
1002
+ "_view_module": "@jupyter-widgets/base",
1003
+ "_view_module_version": "1.2.0",
1004
+ "_view_name": "LayoutView",
1005
+ "align_content": null,
1006
+ "align_items": null,
1007
+ "align_self": null,
1008
+ "border": null,
1009
+ "bottom": null,
1010
+ "display": null,
1011
+ "flex": null,
1012
+ "flex_flow": null,
1013
+ "grid_area": null,
1014
+ "grid_auto_columns": null,
1015
+ "grid_auto_flow": null,
1016
+ "grid_auto_rows": null,
1017
+ "grid_column": null,
1018
+ "grid_gap": null,
1019
+ "grid_row": null,
1020
+ "grid_template_areas": null,
1021
+ "grid_template_columns": null,
1022
+ "grid_template_rows": null,
1023
+ "height": null,
1024
+ "justify_content": null,
1025
+ "justify_items": null,
1026
+ "left": null,
1027
+ "margin": null,
1028
+ "max_height": null,
1029
+ "max_width": null,
1030
+ "min_height": null,
1031
+ "min_width": null,
1032
+ "object_fit": null,
1033
+ "object_position": null,
1034
+ "order": null,
1035
+ "overflow": null,
1036
+ "overflow_x": null,
1037
+ "overflow_y": null,
1038
+ "padding": null,
1039
+ "right": null,
1040
+ "top": null,
1041
+ "visibility": null,
1042
+ "width": null
1043
+ }
1044
+ },
1045
+ "b8e4bd68d27a4d6386814fede84da99e": {
1046
+ "model_module": "@jupyter-widgets/base",
1047
+ "model_module_version": "1.2.0",
1048
+ "model_name": "LayoutModel",
1049
+ "state": {
1050
+ "_model_module": "@jupyter-widgets/base",
1051
+ "_model_module_version": "1.2.0",
1052
+ "_model_name": "LayoutModel",
1053
+ "_view_count": null,
1054
+ "_view_module": "@jupyter-widgets/base",
1055
+ "_view_module_version": "1.2.0",
1056
+ "_view_name": "LayoutView",
1057
+ "align_content": null,
1058
+ "align_items": null,
1059
+ "align_self": null,
1060
+ "border": null,
1061
+ "bottom": null,
1062
+ "display": null,
1063
+ "flex": null,
1064
+ "flex_flow": null,
1065
+ "grid_area": null,
1066
+ "grid_auto_columns": null,
1067
+ "grid_auto_flow": null,
1068
+ "grid_auto_rows": null,
1069
+ "grid_column": null,
1070
+ "grid_gap": null,
1071
+ "grid_row": null,
1072
+ "grid_template_areas": null,
1073
+ "grid_template_columns": null,
1074
+ "grid_template_rows": null,
1075
+ "height": null,
1076
+ "justify_content": null,
1077
+ "justify_items": null,
1078
+ "left": null,
1079
+ "margin": null,
1080
+ "max_height": null,
1081
+ "max_width": null,
1082
+ "min_height": null,
1083
+ "min_width": null,
1084
+ "object_fit": null,
1085
+ "object_position": null,
1086
+ "order": null,
1087
+ "overflow": null,
1088
+ "overflow_x": null,
1089
+ "overflow_y": null,
1090
+ "padding": null,
1091
+ "right": null,
1092
+ "top": null,
1093
+ "visibility": null,
1094
+ "width": null
1095
+ }
1096
+ },
1097
+ "ef48f3ab7d2c4cefbb4f6227d61d2424": {
1098
+ "model_module": "@jupyter-widgets/controls",
1099
+ "model_module_version": "1.5.0",
1100
+ "model_name": "HTMLModel",
1101
+ "state": {
1102
+ "_dom_classes": [],
1103
+ "_model_module": "@jupyter-widgets/controls",
1104
+ "_model_module_version": "1.5.0",
1105
+ "_model_name": "HTMLModel",
1106
+ "_view_count": null,
1107
+ "_view_module": "@jupyter-widgets/controls",
1108
+ "_view_module_version": "1.5.0",
1109
+ "_view_name": "HTMLView",
1110
+ "description": "",
1111
+ "description_tooltip": null,
1112
+ "layout": "IPY_MODEL_1a95698310fa41f89c221f7d775fadf7",
1113
+ "placeholder": "​",
1114
+ "style": "IPY_MODEL_8dc5f8affc6e43329e63112a4e552fff",
1115
+ "value": " 10/10 [02:43&lt;00:00, 12.78s/it]"
1116
+ }
1117
+ }
1118
+ }
1119
+ }
1120
+ },
1121
+ "nbformat": 4,
1122
+ "nbformat_minor": 1
1123
+ }