hemantk089
commited on
Commit
•
b347b77
1
Parent(s):
964de79
Upload AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb
Browse files
AutoTrain_LLM-meta-llama(Llama-2-7b-chat-hf)-sys_template.ipynb
ADDED
@@ -0,0 +1,1123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {
|
7 |
+
"colab": {
|
8 |
+
"base_uri": "https://localhost:8080/"
|
9 |
+
},
|
10 |
+
"id": "JvMRbVLEJlZT",
|
11 |
+
"outputId": "306df70b-0c75-4781-a75b-d10957a028a3"
|
12 |
+
},
|
13 |
+
"outputs": [],
|
14 |
+
"source": [
|
15 |
+
"# #@title 🤗 AutoTrain LLM\n",
|
16 |
+
"# #@markdown In order to use this colab\n",
|
17 |
+
"# #@markdown - upload train.csv to a folder named `data/`\n",
|
18 |
+
"# #@markdown - train.csv must contain a `text` column\n",
|
19 |
+
"# #@markdown - choose a project name if you wish\n",
|
20 |
+
"# #@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
|
21 |
+
"# #@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
|
22 |
+
"# #@markdown - update hyperparameters if you wish\n",
|
23 |
+
"# #@markdown - click `Runtime > Run all` or run each cell individually\n",
|
24 |
+
"\n",
|
25 |
+
"import os\n",
|
26 |
+
"# !pip install -U autotrain-advanced > install_logs.txt\n",
|
27 |
+
"# !autotrain setup > setup_logs.txt"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "code",
|
32 |
+
"execution_count": 2,
|
33 |
+
"metadata": {},
|
34 |
+
"outputs": [],
|
35 |
+
"source": [
|
36 |
+
"# pip install transformers --upgrad\n",
|
37 |
+
"# !pip install torch --upgrade\n",
|
38 |
+
"# !pip install tokenizers --upgrade"
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"execution_count": 3,
|
44 |
+
"metadata": {
|
45 |
+
"id": "A2-_lkBS1WKA"
|
46 |
+
},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"#@markdown ---\n",
|
50 |
+
"#@markdown #### Project Config\n",
|
51 |
+
"#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
|
52 |
+
"project_name = 'my_autotrain_llm_sys_temp_meta_llama_chat' # @param {type:\"string\"}\n",
|
53 |
+
"model_name = \"meta-llama/Llama-2-7b-chat-hf\" # 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
|
54 |
+
"\n",
|
55 |
+
"#@markdown ---\n",
|
56 |
+
"#@markdown #### Push to Hub?\n",
|
57 |
+
"#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
|
58 |
+
"#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
|
59 |
+
"#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
|
60 |
+
"#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
|
61 |
+
"push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
62 |
+
"hf_token = \"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\" #@param {type:\"string\"}\n",
|
63 |
+
"repo_id = \"hemantk089/llm_fine_tuning\" #@param {type:\"string\"}\n",
|
64 |
+
"\n",
|
65 |
+
"#@markdown ---\n",
|
66 |
+
"#@markdown #### Hyperparameters\n",
|
67 |
+
"learning_rate = 2e-4 # @param {type:\"number\"}\n",
|
68 |
+
"num_epochs = 200 #@param {type:\"number\"}\n",
|
69 |
+
"batch_size = 1 # @param {type:\"slider\", min:1, max:32, step:1}\n",
|
70 |
+
"block_size = 1024 # @param {type:\"number\"}\n",
|
71 |
+
"trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
|
72 |
+
"warmup_ratio = 0.1 # @param {type:\"number\"}\n",
|
73 |
+
"weight_decay = 0.01 # @param {type:\"number\"}\n",
|
74 |
+
"gradient_accumulation = 4 # @param {type:\"number\"}\n",
|
75 |
+
"use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
76 |
+
"use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
77 |
+
"use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
78 |
+
"lora_r = 16 #@param {type:\"number\"}\n",
|
79 |
+
"lora_alpha = 32 #@param {type:\"number\"}\n",
|
80 |
+
"lora_dropout = 0.05 #@param {type:\"number\"}\n",
|
81 |
+
"\n",
|
82 |
+
"os.environ[\"PROJECT_NAME\"] = project_name\n",
|
83 |
+
"os.environ[\"MODEL_NAME\"] = model_name\n",
|
84 |
+
"os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
|
85 |
+
"os.environ[\"HF_TOKEN\"] = hf_token\n",
|
86 |
+
"os.environ[\"REPO_ID\"] = repo_id\n",
|
87 |
+
"os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
|
88 |
+
"os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
|
89 |
+
"os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
|
90 |
+
"os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
|
91 |
+
"os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
|
92 |
+
"os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
|
93 |
+
"os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
|
94 |
+
"os.environ[\"USE_FP16\"] = str(use_fp16)\n",
|
95 |
+
"os.environ[\"USE_PEFT\"] = str(use_peft)\n",
|
96 |
+
"os.environ[\"USE_INT4\"] = str(use_int4)\n",
|
97 |
+
"os.environ[\"LORA_R\"] = str(lora_r)\n",
|
98 |
+
"os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
|
99 |
+
"os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
|
100 |
+
]
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"cell_type": "code",
|
104 |
+
"execution_count": 4,
|
105 |
+
"metadata": {
|
106 |
+
"colab": {
|
107 |
+
"base_uri": "https://localhost:8080/"
|
108 |
+
},
|
109 |
+
"id": "g3cd_ED_yXXt",
|
110 |
+
"outputId": "d753c017-cf19-4822-b8ea-c9e6b70fc2d1"
|
111 |
+
},
|
112 |
+
"outputs": [
|
113 |
+
{
|
114 |
+
"name": "stdout",
|
115 |
+
"output_type": "stream",
|
116 |
+
"text": [
|
117 |
+
"[2023-10-04 14:41:59,153] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
|
118 |
+
"> \u001b[1mINFO Running LLM\u001b[0m\n",
|
119 |
+
"> \u001b[1mINFO Params: Namespace(add_eos_token=False, auto_find_batch_size=False, backend='default', block_size=1024, data_path='data/', deploy=False, evaluation_strategy='epoch', fp16=True, func=<function run_llm_command_factory at 0x1468523be1f0>, gradient_accumulation_steps=4, inference=False, learning_rate=0.0002, logging_steps=-1, lora_alpha=32, lora_dropout=0.05, lora_r=16, max_grad_norm=1.0, merge_adapter=False, model='meta-llama/Llama-2-7b-chat-hf', model_max_length=1024, num_train_epochs=200, optimizer='adamw_torch', project_name='my_autotrain_llm_sys_temp_meta_llama_chat', push_to_hub=False, repo_id=None, save_strategy='epoch', save_total_limit=1, scheduler='linear', seed=42, target_modules=None, text_column='text', token=None, train=True, train_batch_size=1, train_split='train', trainer='default', use_flash_attention_2=False, use_int4=True, use_int8=False, use_peft=True, username=None, valid_split=None, version=False, warmup_ratio=0.1, weight_decay=0.01)\u001b[0m\n",
|
120 |
+
"> \u001b[1mINFO loading dataset from csv\u001b[0m\n",
|
121 |
+
"Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.\n",
|
122 |
+
"Using pad_token, but it is not set yet.\n",
|
123 |
+
"Loading checkpoint shards: 100%|██████████████████| 2/2 [00:03<00:00, 1.78s/it]\n",
|
124 |
+
"You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc\n",
|
125 |
+
"Running tokenizer on train dataset: 100%|█| 90/90 [00:00<00:00, 8654.20 examples\n",
|
126 |
+
"Grouping texts in chunks of 1024 (num_proc=4): 100%|█| 90/90 [00:00<00:00, 729.7\n",
|
127 |
+
"> \u001b[1mINFO creating trainer\u001b[0m\n",
|
128 |
+
"{'loss': 2.5454, 'learning_rate': 1e-05, 'epoch': 1.0} \n",
|
129 |
+
"{'loss': 2.5454, 'learning_rate': 2e-05, 'epoch': 2.0} \n",
|
130 |
+
"{'loss': 2.5344, 'learning_rate': 3e-05, 'epoch': 3.0} \n",
|
131 |
+
"{'loss': 2.505, 'learning_rate': 4e-05, 'epoch': 4.0} \n",
|
132 |
+
"{'loss': 2.4569, 'learning_rate': 5e-05, 'epoch': 5.0} \n",
|
133 |
+
"{'loss': 2.3922, 'learning_rate': 6e-05, 'epoch': 6.0} \n",
|
134 |
+
"{'loss': 2.3117, 'learning_rate': 7e-05, 'epoch': 7.0} \n",
|
135 |
+
"{'loss': 2.2224, 'learning_rate': 8e-05, 'epoch': 8.0} \n",
|
136 |
+
"{'loss': 2.1331, 'learning_rate': 9e-05, 'epoch': 9.0} \n",
|
137 |
+
"{'loss': 2.0432, 'learning_rate': 0.0001, 'epoch': 10.0} \n",
|
138 |
+
"{'loss': 1.9529, 'learning_rate': 0.00011000000000000002, 'epoch': 11.0} \n",
|
139 |
+
"{'loss': 1.8601, 'learning_rate': 0.00012, 'epoch': 12.0} \n",
|
140 |
+
"{'loss': 1.7627, 'learning_rate': 0.00013000000000000002, 'epoch': 13.0} \n",
|
141 |
+
"{'loss': 1.6574, 'learning_rate': 0.00014, 'epoch': 14.0} \n",
|
142 |
+
"{'loss': 1.5425, 'learning_rate': 0.00015000000000000001, 'epoch': 15.0} \n",
|
143 |
+
"{'loss': 1.4224, 'learning_rate': 0.00016, 'epoch': 16.0} \n",
|
144 |
+
"{'loss': 1.3218, 'learning_rate': 0.00017, 'epoch': 17.0} \n",
|
145 |
+
"{'loss': 1.2318, 'learning_rate': 0.00018, 'epoch': 18.0} \n",
|
146 |
+
"{'loss': 1.1835, 'learning_rate': 0.00019, 'epoch': 19.0} \n",
|
147 |
+
"{'loss': 1.2394, 'learning_rate': 0.0002, 'epoch': 20.0} \n",
|
148 |
+
"{'loss': 1.2134, 'learning_rate': 0.0001988888888888889, 'epoch': 21.0} \n",
|
149 |
+
"{'loss': 1.1547, 'learning_rate': 0.00019777777777777778, 'epoch': 22.0} \n",
|
150 |
+
"{'loss': 1.1043, 'learning_rate': 0.00019666666666666666, 'epoch': 23.0} \n",
|
151 |
+
"{'loss': 1.0702, 'learning_rate': 0.00019555555555555556, 'epoch': 24.0} \n",
|
152 |
+
"{'loss': 1.0691, 'learning_rate': 0.00019444444444444446, 'epoch': 25.0} \n",
|
153 |
+
"{'loss': 1.043, 'learning_rate': 0.00019333333333333333, 'epoch': 26.0} \n",
|
154 |
+
"{'loss': 1.0063, 'learning_rate': 0.00019222222222222224, 'epoch': 27.0} \n",
|
155 |
+
"{'loss': 0.9849, 'learning_rate': 0.00019111111111111114, 'epoch': 28.0} \n",
|
156 |
+
"{'loss': 0.9743, 'learning_rate': 0.00019, 'epoch': 29.0} \n",
|
157 |
+
"{'loss': 0.9561, 'learning_rate': 0.00018888888888888888, 'epoch': 30.0} \n",
|
158 |
+
"{'loss': 0.933, 'learning_rate': 0.00018777777777777779, 'epoch': 31.0} \n",
|
159 |
+
"{'loss': 0.9131, 'learning_rate': 0.0001866666666666667, 'epoch': 32.0} \n",
|
160 |
+
"{'loss': 0.9, 'learning_rate': 0.00018555555555555556, 'epoch': 33.0} \n",
|
161 |
+
"{'loss': 0.8814, 'learning_rate': 0.00018444444444444446, 'epoch': 34.0} \n",
|
162 |
+
"{'loss': 0.8579, 'learning_rate': 0.00018333333333333334, 'epoch': 35.0} \n",
|
163 |
+
"{'loss': 0.836, 'learning_rate': 0.00018222222222222224, 'epoch': 36.0} \n",
|
164 |
+
"{'loss': 0.8163, 'learning_rate': 0.0001811111111111111, 'epoch': 37.0} \n",
|
165 |
+
"{'loss': 0.7901, 'learning_rate': 0.00018, 'epoch': 38.0} \n",
|
166 |
+
"{'loss': 0.7646, 'learning_rate': 0.0001788888888888889, 'epoch': 39.0} \n",
|
167 |
+
"{'loss': 0.743, 'learning_rate': 0.00017777777777777779, 'epoch': 40.0} \n",
|
168 |
+
"{'loss': 0.7167, 'learning_rate': 0.00017666666666666666, 'epoch': 41.0} \n",
|
169 |
+
"{'loss': 0.6906, 'learning_rate': 0.00017555555555555556, 'epoch': 42.0} \n",
|
170 |
+
"{'loss': 0.6648, 'learning_rate': 0.00017444444444444446, 'epoch': 43.0} \n",
|
171 |
+
"{'loss': 0.6364, 'learning_rate': 0.00017333333333333334, 'epoch': 44.0} \n",
|
172 |
+
"{'loss': 0.6111, 'learning_rate': 0.00017222222222222224, 'epoch': 45.0} \n",
|
173 |
+
"{'loss': 0.5821, 'learning_rate': 0.0001711111111111111, 'epoch': 46.0} \n",
|
174 |
+
"{'loss': 0.5554, 'learning_rate': 0.00017, 'epoch': 47.0} \n",
|
175 |
+
"{'loss': 0.5251, 'learning_rate': 0.00016888888888888889, 'epoch': 48.0} \n",
|
176 |
+
"{'loss': 0.4958, 'learning_rate': 0.0001677777777777778, 'epoch': 49.0} \n",
|
177 |
+
"{'loss': 0.4603, 'learning_rate': 0.0001666666666666667, 'epoch': 50.0} \n",
|
178 |
+
"{'loss': 0.4281, 'learning_rate': 0.00016555555555555556, 'epoch': 51.0} \n",
|
179 |
+
"{'loss': 0.3929, 'learning_rate': 0.00016444444444444444, 'epoch': 52.0} \n",
|
180 |
+
"{'loss': 0.3468, 'learning_rate': 0.00016333333333333334, 'epoch': 53.0} \n",
|
181 |
+
"{'loss': 0.3018, 'learning_rate': 0.00016222222222222224, 'epoch': 54.0} \n",
|
182 |
+
"{'loss': 0.2756, 'learning_rate': 0.00016222222222222224, 'epoch': 55.0} \n",
|
183 |
+
"{'loss': 0.2626, 'learning_rate': 0.0001611111111111111, 'epoch': 56.0} \n",
|
184 |
+
"{'loss': 0.2279, 'learning_rate': 0.00016, 'epoch': 57.0} \n",
|
185 |
+
"{'loss': 0.2086, 'learning_rate': 0.0001588888888888889, 'epoch': 58.0} \n",
|
186 |
+
"{'loss': 0.1972, 'learning_rate': 0.0001577777777777778, 'epoch': 59.0} \n",
|
187 |
+
"{'loss': 0.179, 'learning_rate': 0.00015666666666666666, 'epoch': 60.0} \n",
|
188 |
+
"{'loss': 0.156, 'learning_rate': 0.00015555555555555556, 'epoch': 61.0} \n",
|
189 |
+
"{'loss': 0.1361, 'learning_rate': 0.00015444444444444446, 'epoch': 62.0} \n",
|
190 |
+
"{'loss': 0.1177, 'learning_rate': 0.00015333333333333334, 'epoch': 63.0} \n",
|
191 |
+
"{'loss': 0.1009, 'learning_rate': 0.0001522222222222222, 'epoch': 64.0} \n",
|
192 |
+
"{'loss': 0.0834, 'learning_rate': 0.0001511111111111111, 'epoch': 65.0} \n",
|
193 |
+
"{'loss': 0.0682, 'learning_rate': 0.00015000000000000001, 'epoch': 66.0} \n",
|
194 |
+
"{'loss': 0.057, 'learning_rate': 0.0001488888888888889, 'epoch': 67.0} \n",
|
195 |
+
"{'loss': 0.0469, 'learning_rate': 0.0001477777777777778, 'epoch': 68.0} \n",
|
196 |
+
"{'loss': 0.0382, 'learning_rate': 0.00014666666666666666, 'epoch': 69.0} \n",
|
197 |
+
"{'loss': 0.0327, 'learning_rate': 0.00014555555555555556, 'epoch': 70.0} \n",
|
198 |
+
"{'loss': 0.0272, 'learning_rate': 0.00014444444444444444, 'epoch': 71.0} \n",
|
199 |
+
"{'loss': 0.0222, 'learning_rate': 0.00014333333333333334, 'epoch': 72.0} \n",
|
200 |
+
"{'loss': 0.0191, 'learning_rate': 0.00014222222222222224, 'epoch': 73.0} \n",
|
201 |
+
"{'loss': 0.0165, 'learning_rate': 0.00014111111111111111, 'epoch': 74.0} \n",
|
202 |
+
"{'loss': 0.0145, 'learning_rate': 0.00014, 'epoch': 75.0} \n",
|
203 |
+
"{'loss': 0.0133, 'learning_rate': 0.0001388888888888889, 'epoch': 76.0} \n",
|
204 |
+
"{'loss': 0.0122, 'learning_rate': 0.0001377777777777778, 'epoch': 77.0} \n",
|
205 |
+
"{'loss': 0.011, 'learning_rate': 0.00013666666666666666, 'epoch': 78.0} \n",
|
206 |
+
"{'loss': 0.0104, 'learning_rate': 0.00013555555555555556, 'epoch': 79.0} \n",
|
207 |
+
"{'loss': 0.0099, 'learning_rate': 0.00013444444444444447, 'epoch': 80.0} \n",
|
208 |
+
"{'loss': 0.0094, 'learning_rate': 0.00013333333333333334, 'epoch': 81.0} \n",
|
209 |
+
"{'loss': 0.009, 'learning_rate': 0.00013222222222222221, 'epoch': 82.0} \n",
|
210 |
+
"{'loss': 0.0088, 'learning_rate': 0.00013111111111111111, 'epoch': 83.0} \n",
|
211 |
+
"{'loss': 0.0087, 'learning_rate': 0.00013000000000000002, 'epoch': 84.0} \n",
|
212 |
+
"{'loss': 0.0083, 'learning_rate': 0.00012888888888888892, 'epoch': 85.0} \n",
|
213 |
+
"{'loss': 0.0083, 'learning_rate': 0.00012777777777777776, 'epoch': 86.0} \n",
|
214 |
+
"{'loss': 0.0077, 'learning_rate': 0.00012666666666666666, 'epoch': 87.0} \n",
|
215 |
+
"{'loss': 0.0077, 'learning_rate': 0.00012555555555555557, 'epoch': 88.0} \n",
|
216 |
+
"{'loss': 0.0075, 'learning_rate': 0.00012444444444444444, 'epoch': 89.0} \n",
|
217 |
+
"{'loss': 0.0074, 'learning_rate': 0.00012333333333333334, 'epoch': 90.0} \n",
|
218 |
+
"{'loss': 0.0072, 'learning_rate': 0.00012222222222222224, 'epoch': 91.0} \n",
|
219 |
+
"{'loss': 0.0073, 'learning_rate': 0.0001211111111111111, 'epoch': 92.0} \n",
|
220 |
+
"{'loss': 0.0068, 'learning_rate': 0.00012, 'epoch': 93.0} \n",
|
221 |
+
"{'loss': 0.0068, 'learning_rate': 0.00011888888888888889, 'epoch': 94.0} \n",
|
222 |
+
"{'loss': 0.0067, 'learning_rate': 0.00011777777777777779, 'epoch': 95.0} \n",
|
223 |
+
"{'loss': 0.0066, 'learning_rate': 0.00011666666666666668, 'epoch': 96.0} \n",
|
224 |
+
"{'loss': 0.0065, 'learning_rate': 0.00011555555555555555, 'epoch': 97.0} \n",
|
225 |
+
"{'loss': 0.0065, 'learning_rate': 0.00011444444444444444, 'epoch': 98.0} \n",
|
226 |
+
"{'loss': 0.0062, 'learning_rate': 0.00011333333333333334, 'epoch': 99.0} \n",
|
227 |
+
"{'loss': 0.0062, 'learning_rate': 0.00011222222222222223, 'epoch': 100.0} \n",
|
228 |
+
"{'loss': 0.0064, 'learning_rate': 0.00011111111111111112, 'epoch': 101.0} \n",
|
229 |
+
"{'loss': 0.006, 'learning_rate': 0.00011000000000000002, 'epoch': 102.0} \n",
|
230 |
+
"{'loss': 0.0061, 'learning_rate': 0.00010888888888888889, 'epoch': 103.0} \n",
|
231 |
+
"{'loss': 0.0061, 'learning_rate': 0.00010777777777777778, 'epoch': 104.0} \n",
|
232 |
+
"{'loss': 0.0061, 'learning_rate': 0.00010666666666666667, 'epoch': 105.0} \n",
|
233 |
+
"{'loss': 0.0062, 'learning_rate': 0.00010555555555555557, 'epoch': 106.0} \n",
|
234 |
+
"{'loss': 0.006, 'learning_rate': 0.00010444444444444445, 'epoch': 107.0} \n",
|
235 |
+
"{'loss': 0.0061, 'learning_rate': 0.00010333333333333334, 'epoch': 108.0} \n",
|
236 |
+
"{'loss': 0.0059, 'learning_rate': 0.00010222222222222222, 'epoch': 109.0} \n",
|
237 |
+
"{'loss': 0.0059, 'learning_rate': 0.00010111111111111112, 'epoch': 110.0} \n",
|
238 |
+
"{'loss': 0.0059, 'learning_rate': 0.0001, 'epoch': 111.0} \n",
|
239 |
+
"{'loss': 0.0058, 'learning_rate': 9.888888888888889e-05, 'epoch': 112.0} \n",
|
240 |
+
"{'loss': 0.0058, 'learning_rate': 9.777777777777778e-05, 'epoch': 113.0} \n",
|
241 |
+
"{'loss': 0.0057, 'learning_rate': 9.666666666666667e-05, 'epoch': 114.0} \n",
|
242 |
+
"{'loss': 0.0057, 'learning_rate': 9.555555555555557e-05, 'epoch': 115.0} \n",
|
243 |
+
"{'loss': 0.0057, 'learning_rate': 9.444444444444444e-05, 'epoch': 116.0} \n",
|
244 |
+
"{'loss': 0.0056, 'learning_rate': 9.333333333333334e-05, 'epoch': 117.0} \n",
|
245 |
+
"{'loss': 0.0057, 'learning_rate': 9.222222222222223e-05, 'epoch': 118.0} \n",
|
246 |
+
"{'loss': 0.0057, 'learning_rate': 9.111111111111112e-05, 'epoch': 119.0} \n",
|
247 |
+
"{'loss': 0.0057, 'learning_rate': 9e-05, 'epoch': 120.0} \n",
|
248 |
+
"{'loss': 0.0057, 'learning_rate': 8.888888888888889e-05, 'epoch': 121.0} \n",
|
249 |
+
"{'loss': 0.0056, 'learning_rate': 8.777777777777778e-05, 'epoch': 122.0} \n",
|
250 |
+
"{'loss': 0.0056, 'learning_rate': 8.666666666666667e-05, 'epoch': 123.0} \n",
|
251 |
+
"{'loss': 0.0056, 'learning_rate': 8.555555555555556e-05, 'epoch': 124.0} \n",
|
252 |
+
"{'loss': 0.0056, 'learning_rate': 8.444444444444444e-05, 'epoch': 125.0} \n",
|
253 |
+
"{'loss': 0.0056, 'learning_rate': 8.333333333333334e-05, 'epoch': 126.0} \n",
|
254 |
+
"{'loss': 0.0056, 'learning_rate': 8.222222222222222e-05, 'epoch': 127.0} \n",
|
255 |
+
"{'loss': 0.0056, 'learning_rate': 8.111111111111112e-05, 'epoch': 128.0} \n",
|
256 |
+
"{'loss': 0.0054, 'learning_rate': 8e-05, 'epoch': 129.0} \n",
|
257 |
+
"{'loss': 0.0055, 'learning_rate': 7.88888888888889e-05, 'epoch': 130.0} \n",
|
258 |
+
"{'loss': 0.0055, 'learning_rate': 7.777777777777778e-05, 'epoch': 131.0} \n",
|
259 |
+
"{'loss': 0.0055, 'learning_rate': 7.666666666666667e-05, 'epoch': 132.0} \n",
|
260 |
+
"{'loss': 0.0054, 'learning_rate': 7.555555555555556e-05, 'epoch': 133.0} \n",
|
261 |
+
"{'loss': 0.0055, 'learning_rate': 7.444444444444444e-05, 'epoch': 134.0} \n",
|
262 |
+
"{'loss': 0.0055, 'learning_rate': 7.333333333333333e-05, 'epoch': 135.0} \n",
|
263 |
+
"{'loss': 0.0055, 'learning_rate': 7.222222222222222e-05, 'epoch': 136.0} \n",
|
264 |
+
"{'loss': 0.0054, 'learning_rate': 7.111111111111112e-05, 'epoch': 137.0} \n",
|
265 |
+
"{'loss': 0.0054, 'learning_rate': 7e-05, 'epoch': 138.0} \n",
|
266 |
+
"{'loss': 0.0053, 'learning_rate': 6.88888888888889e-05, 'epoch': 139.0} \n",
|
267 |
+
"{'loss': 0.0053, 'learning_rate': 6.777777777777778e-05, 'epoch': 140.0} \n",
|
268 |
+
"{'loss': 0.0055, 'learning_rate': 6.666666666666667e-05, 'epoch': 141.0} \n",
|
269 |
+
"{'loss': 0.0053, 'learning_rate': 6.555555555555556e-05, 'epoch': 142.0} \n",
|
270 |
+
"{'loss': 0.0054, 'learning_rate': 6.444444444444446e-05, 'epoch': 143.0} \n",
|
271 |
+
"{'loss': 0.0054, 'learning_rate': 6.333333333333333e-05, 'epoch': 144.0} \n",
|
272 |
+
"{'loss': 0.0052, 'learning_rate': 6.222222222222222e-05, 'epoch': 145.0} \n",
|
273 |
+
"{'loss': 0.0053, 'learning_rate': 6.111111111111112e-05, 'epoch': 146.0} \n",
|
274 |
+
"{'loss': 0.0053, 'learning_rate': 6e-05, 'epoch': 147.0} \n",
|
275 |
+
"{'loss': 0.0052, 'learning_rate': 5.8888888888888896e-05, 'epoch': 148.0} \n",
|
276 |
+
"{'loss': 0.0051, 'learning_rate': 5.7777777777777776e-05, 'epoch': 149.0} \n",
|
277 |
+
"{'loss': 0.0053, 'learning_rate': 5.666666666666667e-05, 'epoch': 150.0} \n",
|
278 |
+
"{'loss': 0.0052, 'learning_rate': 5.555555555555556e-05, 'epoch': 151.0} \n",
|
279 |
+
"{'loss': 0.0053, 'learning_rate': 5.4444444444444446e-05, 'epoch': 152.0} \n",
|
280 |
+
"{'loss': 0.0053, 'learning_rate': 5.333333333333333e-05, 'epoch': 153.0} \n",
|
281 |
+
"{'loss': 0.0052, 'learning_rate': 5.222222222222223e-05, 'epoch': 154.0} \n",
|
282 |
+
"{'loss': 0.0051, 'learning_rate': 5.111111111111111e-05, 'epoch': 155.0} \n",
|
283 |
+
"{'loss': 0.0051, 'learning_rate': 5e-05, 'epoch': 156.0} \n",
|
284 |
+
"{'loss': 0.0053, 'learning_rate': 4.888888888888889e-05, 'epoch': 157.0} \n",
|
285 |
+
"{'loss': 0.0053, 'learning_rate': 4.7777777777777784e-05, 'epoch': 158.0} \n",
|
286 |
+
"{'loss': 0.0052, 'learning_rate': 4.666666666666667e-05, 'epoch': 159.0} \n",
|
287 |
+
"{'loss': 0.0051, 'learning_rate': 4.555555555555556e-05, 'epoch': 160.0} \n",
|
288 |
+
"{'loss': 0.0053, 'learning_rate': 4.4444444444444447e-05, 'epoch': 161.0} \n",
|
289 |
+
"{'loss': 0.0052, 'learning_rate': 4.3333333333333334e-05, 'epoch': 162.0} \n",
|
290 |
+
"{'loss': 0.0051, 'learning_rate': 4.222222222222222e-05, 'epoch': 163.0} \n",
|
291 |
+
"{'loss': 0.0052, 'learning_rate': 4.111111111111111e-05, 'epoch': 164.0} \n",
|
292 |
+
"{'loss': 0.0051, 'learning_rate': 4e-05, 'epoch': 165.0} \n",
|
293 |
+
"{'loss': 0.0053, 'learning_rate': 3.888888888888889e-05, 'epoch': 166.0} \n",
|
294 |
+
"{'loss': 0.0051, 'learning_rate': 3.777777777777778e-05, 'epoch': 167.0} \n",
|
295 |
+
"{'loss': 0.0051, 'learning_rate': 3.6666666666666666e-05, 'epoch': 168.0} \n",
|
296 |
+
"{'loss': 0.0051, 'learning_rate': 3.555555555555556e-05, 'epoch': 169.0} \n",
|
297 |
+
"{'loss': 0.005, 'learning_rate': 3.444444444444445e-05, 'epoch': 170.0} \n",
|
298 |
+
"{'loss': 0.005, 'learning_rate': 3.3333333333333335e-05, 'epoch': 171.0} \n",
|
299 |
+
"{'loss': 0.0051, 'learning_rate': 3.222222222222223e-05, 'epoch': 172.0} \n",
|
300 |
+
"{'loss': 0.0051, 'learning_rate': 3.111111111111111e-05, 'epoch': 173.0} \n"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"name": "stdout",
|
305 |
+
"output_type": "stream",
|
306 |
+
"text": [
|
307 |
+
"{'loss': 0.0051, 'learning_rate': 3e-05, 'epoch': 174.0} \n",
|
308 |
+
"{'loss': 0.0051, 'learning_rate': 2.8888888888888888e-05, 'epoch': 175.0} \n",
|
309 |
+
"{'loss': 0.005, 'learning_rate': 2.777777777777778e-05, 'epoch': 176.0} \n",
|
310 |
+
"{'loss': 0.0052, 'learning_rate': 2.6666666666666667e-05, 'epoch': 177.0} \n",
|
311 |
+
"{'loss': 0.005, 'learning_rate': 2.5555555555555554e-05, 'epoch': 178.0} \n",
|
312 |
+
"{'loss': 0.005, 'learning_rate': 2.4444444444444445e-05, 'epoch': 179.0} \n",
|
313 |
+
"{'loss': 0.005, 'learning_rate': 2.3333333333333336e-05, 'epoch': 180.0} \n",
|
314 |
+
"{'loss': 0.005, 'learning_rate': 2.2222222222222223e-05, 'epoch': 181.0} \n",
|
315 |
+
"{'loss': 0.005, 'learning_rate': 2.111111111111111e-05, 'epoch': 182.0} \n",
|
316 |
+
"{'loss': 0.005, 'learning_rate': 2e-05, 'epoch': 183.0} \n",
|
317 |
+
"{'loss': 0.0049, 'learning_rate': 1.888888888888889e-05, 'epoch': 184.0} \n",
|
318 |
+
"{'loss': 0.0049, 'learning_rate': 1.777777777777778e-05, 'epoch': 185.0} \n",
|
319 |
+
"{'loss': 0.005, 'learning_rate': 1.6666666666666667e-05, 'epoch': 186.0} \n",
|
320 |
+
"{'loss': 0.005, 'learning_rate': 1.5555555555555555e-05, 'epoch': 187.0} \n",
|
321 |
+
"{'loss': 0.0049, 'learning_rate': 1.4444444444444444e-05, 'epoch': 188.0} \n",
|
322 |
+
"{'loss': 0.005, 'learning_rate': 1.3333333333333333e-05, 'epoch': 189.0} \n",
|
323 |
+
"{'loss': 0.0049, 'learning_rate': 1.2222222222222222e-05, 'epoch': 190.0} \n",
|
324 |
+
"{'loss': 0.0049, 'learning_rate': 1.1111111111111112e-05, 'epoch': 191.0} \n",
|
325 |
+
"{'loss': 0.0049, 'learning_rate': 1e-05, 'epoch': 192.0} \n",
|
326 |
+
"{'loss': 0.0049, 'learning_rate': 8.88888888888889e-06, 'epoch': 193.0} \n",
|
327 |
+
"{'loss': 0.0048, 'learning_rate': 7.777777777777777e-06, 'epoch': 194.0} \n",
|
328 |
+
"{'loss': 0.0049, 'learning_rate': 6.666666666666667e-06, 'epoch': 195.0} \n",
|
329 |
+
"{'loss': 0.005, 'learning_rate': 5.555555555555556e-06, 'epoch': 196.0} \n",
|
330 |
+
"{'loss': 0.005, 'learning_rate': 4.444444444444445e-06, 'epoch': 197.0} \n",
|
331 |
+
"{'loss': 0.0049, 'learning_rate': 3.3333333333333333e-06, 'epoch': 198.0} \n",
|
332 |
+
"{'loss': 0.0049, 'learning_rate': 2.2222222222222225e-06, 'epoch': 199.0} \n",
|
333 |
+
"{'loss': 0.0048, 'learning_rate': 1.1111111111111112e-06, 'epoch': 200.0} \n",
|
334 |
+
"{'train_runtime': 2631.4323, 'train_samples_per_second': 0.304, 'train_steps_per_second': 0.076, 'train_loss': 0.34155846770387144, 'epoch': 200.0}\n",
|
335 |
+
"100%|█████████████████████████████████████████| 200/200 [43:51<00:00, 13.16s/it]\n",
|
336 |
+
"> \u001b[1mINFO Finished training, saving model...\u001b[0m\n",
|
337 |
+
"CPU times: user 12.4 s, sys: 4.81 s, total: 17.3 s\n",
|
338 |
+
"Wall time: 44min 7s\n"
|
339 |
+
]
|
340 |
+
}
|
341 |
+
],
|
342 |
+
"source": [
|
343 |
+
"%%time\n",
|
344 |
+
"!autotrain llm \\\n",
|
345 |
+
"--train \\\n",
|
346 |
+
"--model ${MODEL_NAME} \\\n",
|
347 |
+
"--project-name ${PROJECT_NAME} \\\n",
|
348 |
+
"--data-path data/ \\\n",
|
349 |
+
"--text-column text \\\n",
|
350 |
+
"--lr ${LEARNING_RATE} \\\n",
|
351 |
+
"--batch-size ${BATCH_SIZE} \\\n",
|
352 |
+
"--epochs ${NUM_EPOCHS} \\\n",
|
353 |
+
"--block-size ${BLOCK_SIZE} \\\n",
|
354 |
+
"--warmup-ratio ${WARMUP_RATIO} \\\n",
|
355 |
+
"--lora-r ${LORA_R} \\\n",
|
356 |
+
"--lora-alpha ${LORA_ALPHA} \\\n",
|
357 |
+
"--lora-dropout ${LORA_DROPOUT} \\\n",
|
358 |
+
"--weight-decay ${WEIGHT_DECAY} \\\n",
|
359 |
+
"--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
|
360 |
+
"$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
|
361 |
+
"$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
|
362 |
+
"$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
|
363 |
+
"$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
|
364 |
+
]
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"cell_type": "code",
|
368 |
+
"execution_count": 5,
|
369 |
+
"metadata": {
|
370 |
+
"id": "gdGQQoED1WSd"
|
371 |
+
},
|
372 |
+
"outputs": [],
|
373 |
+
"source": [
|
374 |
+
"# !mkdir data"
|
375 |
+
]
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"cell_type": "code",
|
379 |
+
"execution_count": 6,
|
380 |
+
"metadata": {
|
381 |
+
"id": "J3_aYwtv5LtN"
|
382 |
+
},
|
383 |
+
"outputs": [],
|
384 |
+
"source": [
|
385 |
+
"# cd data"
|
386 |
+
]
|
387 |
+
},
|
388 |
+
{
|
389 |
+
"cell_type": "code",
|
390 |
+
"execution_count": 7,
|
391 |
+
"metadata": {
|
392 |
+
"id": "FtoUbYWR5RSD"
|
393 |
+
},
|
394 |
+
"outputs": [],
|
395 |
+
"source": [
|
396 |
+
"import pandas as pd\n",
|
397 |
+
"df = pd.read_csv('./data/train.csv')"
|
398 |
+
]
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"cell_type": "code",
|
402 |
+
"execution_count": 8,
|
403 |
+
"metadata": {
|
404 |
+
"colab": {
|
405 |
+
"base_uri": "https://localhost:8080/",
|
406 |
+
"height": 536
|
407 |
+
},
|
408 |
+
"id": "Vg9SeMOf8Zh5",
|
409 |
+
"outputId": "4d86182d-dd4e-4397-cdf9-b86c3816cd4e"
|
410 |
+
},
|
411 |
+
"outputs": [
|
412 |
+
{
|
413 |
+
"name": "stdout",
|
414 |
+
"output_type": "stream",
|
415 |
+
"text": [
|
416 |
+
"(90, 3)\n"
|
417 |
+
]
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"data": {
|
421 |
+
"text/html": [
|
422 |
+
"<div>\n",
|
423 |
+
"<style scoped>\n",
|
424 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
425 |
+
" vertical-align: middle;\n",
|
426 |
+
" }\n",
|
427 |
+
"\n",
|
428 |
+
" .dataframe tbody tr th {\n",
|
429 |
+
" vertical-align: top;\n",
|
430 |
+
" }\n",
|
431 |
+
"\n",
|
432 |
+
" .dataframe thead th {\n",
|
433 |
+
" text-align: right;\n",
|
434 |
+
" }\n",
|
435 |
+
"</style>\n",
|
436 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
437 |
+
" <thead>\n",
|
438 |
+
" <tr style=\"text-align: right;\">\n",
|
439 |
+
" <th></th>\n",
|
440 |
+
" <th>Description</th>\n",
|
441 |
+
" <th>Relevances</th>\n",
|
442 |
+
" <th>text</th>\n",
|
443 |
+
" </tr>\n",
|
444 |
+
" </thead>\n",
|
445 |
+
" <tbody>\n",
|
446 |
+
" <tr>\n",
|
447 |
+
" <th>0</th>\n",
|
448 |
+
" <td>Give the processor information</td>\n",
|
449 |
+
" <td>vendor names of processors</td>\n",
|
450 |
+
" <td><s>[INST] <<SYS>> Write the BigFixRelevance fo...</td>\n",
|
451 |
+
" </tr>\n",
|
452 |
+
" <tr>\n",
|
453 |
+
" <th>1</th>\n",
|
454 |
+
" <td>find the speed of processor</td>\n",
|
455 |
+
" <td>speeds of processors</td>\n",
|
456 |
+
" <td><s>[INST] <<SYS>> Write the BigFixRelevance fo...</td>\n",
|
457 |
+
" </tr>\n",
|
458 |
+
" <tr>\n",
|
459 |
+
" <th>2</th>\n",
|
460 |
+
" <td>find unique processor names</td>\n",
|
461 |
+
" <td>unique values of vendor names of processors</td>\n",
|
462 |
+
" <td><s>[INST] <<SYS>> Write the BigFixRelevance fo...</td>\n",
|
463 |
+
" </tr>\n",
|
464 |
+
" <tr>\n",
|
465 |
+
" <th>3</th>\n",
|
466 |
+
" <td>find name of operating system</td>\n",
|
467 |
+
" <td>name of operating system</td>\n",
|
468 |
+
" <td><s>[INST] <<SYS>> Write the BigFixRelevance fo...</td>\n",
|
469 |
+
" </tr>\n",
|
470 |
+
" <tr>\n",
|
471 |
+
" <th>4</th>\n",
|
472 |
+
" <td>find family names of processor</td>\n",
|
473 |
+
" <td>family names of processors</td>\n",
|
474 |
+
" <td><s>[INST] <<SYS>> Write the BigFixRelevance fo...</td>\n",
|
475 |
+
" </tr>\n",
|
476 |
+
" </tbody>\n",
|
477 |
+
"</table>\n",
|
478 |
+
"</div>"
|
479 |
+
],
|
480 |
+
"text/plain": [
|
481 |
+
" Description \\\n",
|
482 |
+
"0 Give the processor information \n",
|
483 |
+
"1 find the speed of processor \n",
|
484 |
+
"2 find unique processor names \n",
|
485 |
+
"3 find name of operating system \n",
|
486 |
+
"4 find family names of processor \n",
|
487 |
+
"\n",
|
488 |
+
" Relevances \\\n",
|
489 |
+
"0 vendor names of processors \n",
|
490 |
+
"1 speeds of processors \n",
|
491 |
+
"2 unique values of vendor names of processors \n",
|
492 |
+
"3 name of operating system \n",
|
493 |
+
"4 family names of processors \n",
|
494 |
+
"\n",
|
495 |
+
" text \n",
|
496 |
+
"0 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
|
497 |
+
"1 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
|
498 |
+
"2 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
|
499 |
+
"3 <s>[INST] <<SYS>> Write the BigFixRelevance fo... \n",
|
500 |
+
"4 <s>[INST] <<SYS>> Write the BigFixRelevance fo... "
|
501 |
+
]
|
502 |
+
},
|
503 |
+
"execution_count": 8,
|
504 |
+
"metadata": {},
|
505 |
+
"output_type": "execute_result"
|
506 |
+
}
|
507 |
+
],
|
508 |
+
"source": [
|
509 |
+
"print(df.shape)\n",
|
510 |
+
"df.head(5)"
|
511 |
+
]
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"cell_type": "code",
|
515 |
+
"execution_count": 14,
|
516 |
+
"metadata": {
|
517 |
+
"id": "ZxC7Rw7TFTPY"
|
518 |
+
},
|
519 |
+
"outputs": [],
|
520 |
+
"source": [
|
521 |
+
"from dataclasses import dataclass\n",
|
522 |
+
"from typing import Optional\n",
|
523 |
+
"\n",
|
524 |
+
"import torch\n",
|
525 |
+
"from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig\n",
|
526 |
+
"\n",
|
527 |
+
"\n",
|
528 |
+
"@dataclass\n",
|
529 |
+
"class TextGenerationInference:\n",
|
530 |
+
" model_path: str = \"my_autotrain_llm_sys_temp_meta_llama_chat\"\n",
|
531 |
+
" use_int4: Optional[bool] = False\n",
|
532 |
+
" use_int8: Optional[bool] = False\n",
|
533 |
+
" temperature: Optional[float] = 0.6\n",
|
534 |
+
" top_k: Optional[int] = 50\n",
|
535 |
+
" top_p: Optional[float] = 0.95\n",
|
536 |
+
" repetition_penalty: Optional[float] = 1.0\n",
|
537 |
+
" num_return_sequences: Optional[int] = 1\n",
|
538 |
+
" num_beams: Optional[int] = 5\n",
|
539 |
+
" max_new_tokens: Optional[int] = 1024\n",
|
540 |
+
" do_sample: Optional[bool] = True\n",
|
541 |
+
"\n",
|
542 |
+
" def __post_init__(self):\n",
|
543 |
+
" self.model = AutoModelForCausalLM.from_pretrained(\n",
|
544 |
+
" self.model_path,\n",
|
545 |
+
" load_in_4bit=self.use_int4,\n",
|
546 |
+
" load_in_8bit=self.use_int8,\n",
|
547 |
+
" torch_dtype=torch.float16,\n",
|
548 |
+
" trust_remote_code=True,\n",
|
549 |
+
" device_map=\"auto\",\n",
|
550 |
+
" offload_folder=\"/azusers/work/Hemant/data\" # \"./data\"\n",
|
551 |
+
" )\n",
|
552 |
+
" self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)\n",
|
553 |
+
" self.model.eval()\n",
|
554 |
+
" self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
555 |
+
" self.generation_config = GenerationConfig(\n",
|
556 |
+
" temperature=self.temperature,\n",
|
557 |
+
" top_k=self.top_k,\n",
|
558 |
+
" top_p=self.top_p,\n",
|
559 |
+
" repetition_penalty=self.repetition_penalty,\n",
|
560 |
+
" num_return_sequences=self.num_return_sequences,\n",
|
561 |
+
" num_beams=self.num_beams,\n",
|
562 |
+
" max_length=self.max_new_tokens,\n",
|
563 |
+
" eos_token_id=self.tokenizer.eos_token_id,\n",
|
564 |
+
" do_sample=self.do_sample,\n",
|
565 |
+
" max_new_tokens=self.max_new_tokens,\n",
|
566 |
+
" )\n",
|
567 |
+
"\n",
|
568 |
+
"# def chat(self, prompt):\n",
|
569 |
+
"# inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
|
570 |
+
"# outputs = self.model.generate(**inputs, generation_config=self.generation_config)\n",
|
571 |
+
"# return self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
572 |
+
" \n",
|
573 |
+
"# def chat(self, prompt):\n",
|
574 |
+
"# inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
|
575 |
+
"\n",
|
576 |
+
"# # Ensure the generation config uses beam search and returns 5 sequences\n",
|
577 |
+
"# self.generation_config['num_beams'] = 5\n",
|
578 |
+
"# self.generation_config['num_return_sequences'] = 5\n",
|
579 |
+
"\n",
|
580 |
+
"# outputs = self.model.generate(**inputs, **self.generation_config)\n",
|
581 |
+
"\n",
|
582 |
+
"# # Decode each of the returned sequences\n",
|
583 |
+
"# responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n",
|
584 |
+
"# return responses\n",
|
585 |
+
" \n",
|
586 |
+
" def chat(self, prompt):\n",
|
587 |
+
" inputs = self.tokenizer([prompt], return_tensors=\"pt\").to(self.device)\n",
|
588 |
+
"\n",
|
589 |
+
" # Ensure the generation config uses beam search and returns 5 sequences\n",
|
590 |
+
" setattr(self.generation_config, 'num_beams', 5)\n",
|
591 |
+
" setattr(self.generation_config, 'num_return_sequences', 5)\n",
|
592 |
+
"\n",
|
593 |
+
" outputs = self.model.generate(\n",
|
594 |
+
" **inputs, \n",
|
595 |
+
" num_beams=self.generation_config.num_beams, \n",
|
596 |
+
" num_return_sequences=self.generation_config.num_return_sequences\n",
|
597 |
+
" )\n",
|
598 |
+
"\n",
|
599 |
+
" # Decode each of the returned sequences\n",
|
600 |
+
" responses = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]\n",
|
601 |
+
" return responses\n",
|
602 |
+
"\n",
|
603 |
+
"\n",
|
604 |
+
" "
|
605 |
+
]
|
606 |
+
},
|
607 |
+
{
|
608 |
+
"cell_type": "code",
|
609 |
+
"execution_count": null,
|
610 |
+
"metadata": {
|
611 |
+
"colab": {
|
612 |
+
"base_uri": "https://localhost:8080/",
|
613 |
+
"height": 538,
|
614 |
+
"referenced_widgets": [
|
615 |
+
"35c2527c83f94eb983b1ae730ed91747",
|
616 |
+
"2392776c72b94666abf1fd5f62549b87",
|
617 |
+
"1c57bab4fd2e47f1b90a3d6758fa77cb",
|
618 |
+
"ef48f3ab7d2c4cefbb4f6227d61d2424",
|
619 |
+
"9e13b105fc8a4207969a41055d0dae4e",
|
620 |
+
"b8e4bd68d27a4d6386814fede84da99e",
|
621 |
+
"94c0c32442fe47fb926be879602cdd1c",
|
622 |
+
"3b8f2fec5e3d4ab69855adf5cd7b0ffb",
|
623 |
+
"73a820a723814b8aba81c89bac03716e",
|
624 |
+
"1a95698310fa41f89c221f7d775fadf7",
|
625 |
+
"8dc5f8affc6e43329e63112a4e552fff"
|
626 |
+
]
|
627 |
+
},
|
628 |
+
"id": "DVwk_COZaU5e",
|
629 |
+
"outputId": "ed811938-63d2-4fac-9a19-fc9029f0e0f6"
|
630 |
+
},
|
631 |
+
"outputs": [],
|
632 |
+
"source": [
|
633 |
+
"# %%time\n",
|
634 |
+
"# inference = TextGenerationInference() # Create an instance with default settings\n",
|
635 |
+
"\n",
|
636 |
+
"# prompt = \"\"\"\n",
|
637 |
+
"# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
|
638 |
+
"\n",
|
639 |
+
"# ### Instruction:\n",
|
640 |
+
"# Continue the story based on the given starting sentence.\n",
|
641 |
+
"\n",
|
642 |
+
"# ### Input:\n",
|
643 |
+
"# Once upon a time,\n",
|
644 |
+
"# \"\"\"\n",
|
645 |
+
"\n",
|
646 |
+
"# response = inference.chat(prompt)\n",
|
647 |
+
"# print(response)"
|
648 |
+
]
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"cell_type": "code",
|
652 |
+
"execution_count": 16,
|
653 |
+
"metadata": {},
|
654 |
+
"outputs": [],
|
655 |
+
"source": [
|
656 |
+
"import torch\n",
|
657 |
+
"torch.cuda.empty_cache()"
|
658 |
+
]
|
659 |
+
},
|
660 |
+
{
|
661 |
+
"cell_type": "code",
|
662 |
+
"execution_count": 15,
|
663 |
+
"metadata": {},
|
664 |
+
"outputs": [
|
665 |
+
{
|
666 |
+
"data": {
|
667 |
+
"application/vnd.jupyter.widget-view+json": {
|
668 |
+
"model_id": "7c38ada366954e7aa5d6456793d72b87",
|
669 |
+
"version_major": 2,
|
670 |
+
"version_minor": 0
|
671 |
+
},
|
672 |
+
"text/plain": [
|
673 |
+
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"metadata": {},
|
677 |
+
"output_type": "display_data"
|
678 |
+
},
|
679 |
+
{
|
680 |
+
"name": "stderr",
|
681 |
+
"output_type": "stream",
|
682 |
+
"text": [
|
683 |
+
"You shouldn't move a model when it is dispatched on multiple devices.\n"
|
684 |
+
]
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"ename": "RuntimeError",
|
688 |
+
"evalue": "You can't move a model that has some modules offloaded to cpu or disk.",
|
689 |
+
"output_type": "error",
|
690 |
+
"traceback": [
|
691 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
692 |
+
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
693 |
+
"File \u001b[0;32m<timed exec>:1\u001b[0m\n",
|
694 |
+
"File \u001b[0;32m<string>:14\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, model_path, use_int4, use_int8, temperature, top_k, top_p, repetition_penalty, num_return_sequences, num_beams, max_new_tokens, do_sample)\u001b[0m\n",
|
695 |
+
"Cell \u001b[0;32mIn[14], line 23\u001b[0m, in \u001b[0;36mTextGenerationInference.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__post_init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 23\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_4bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int4\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_int8\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat16\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mauto\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/azusers/work/Hemant/data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# \"./data\"\u001b[39;49;00m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_path, trust_remote_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39meval()\n",
|
696 |
+
"File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:563\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 562\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 563\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 564\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 567\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m )\n",
|
697 |
+
"File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/modeling_utils.py:3253\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3250\u001b[0m model \u001b[38;5;241m=\u001b[39m quantizer\u001b[38;5;241m.\u001b[39mpost_init_model(model)\n\u001b[1;32m 3252\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _adapter_model_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3253\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_adapter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3254\u001b[0m \u001b[43m \u001b[49m\u001b[43m_adapter_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3255\u001b[0m \u001b[43m \u001b[49m\u001b[43madapter_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madapter_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3256\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3257\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3258\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_loading_info:\n\u001b[1;32m 3261\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loading_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
698 |
+
"File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:180\u001b[0m, in \u001b[0;36mPeftAdapterMixin.load_adapter\u001b[0;34m(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# Re-dispatch model and hooks in case the model is offloaded to CPU / Disk.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 176\u001b[0m (\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhf_device_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 177\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhf_device_map\u001b[38;5;241m.\u001b[39mvalues())\u001b[38;5;241m.\u001b[39mintersection({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m})) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 178\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_config) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 179\u001b[0m ):\n\u001b[0;32m--> 180\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dispatch_accelerate_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 181\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
699 |
+
"File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/transformers/integrations/peft.py:390\u001b[0m, in \u001b[0;36mPeftAdapterMixin._dispatch_accelerate_model\u001b[0;34m(self, device_map, max_memory, offload_folder, offload_index)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(device_map, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 387\u001b[0m device_map \u001b[38;5;241m=\u001b[39m infer_auto_device_map(\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28mself\u001b[39m, max_memory\u001b[38;5;241m=\u001b[39mmax_memory, no_split_module_classes\u001b[38;5;241m=\u001b[39mno_split_module_classes\n\u001b[1;32m 389\u001b[0m )\n\u001b[0;32m--> 390\u001b[0m \u001b[43mdispatch_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 394\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdispatch_model_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
700 |
+
"File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:426\u001b[0m, in \u001b[0;36mdispatch_model\u001b[0;34m(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)\u001b[0m\n\u001b[1;32m 424\u001b[0m device \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(device_map\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m device \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisk\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 426\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 429\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 430\u001b[0m )\n",
|
701 |
+
"File \u001b[0;32m/data/anaconda/envs/py10/lib/python3.10/site-packages/accelerate/big_modeling.py:415\u001b[0m, in \u001b[0;36mdispatch_model.<locals>.add_warning.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m param \u001b[38;5;129;01min\u001b[39;00m model\u001b[38;5;241m.\u001b[39mparameters():\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m param\u001b[38;5;241m.\u001b[39mdevice \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 415\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt move a model that has some modules offloaded to cpu or disk.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
|
702 |
+
"\u001b[0;31mRuntimeError\u001b[0m: You can't move a model that has some modules offloaded to cpu or disk."
|
703 |
+
]
|
704 |
+
}
|
705 |
+
],
|
706 |
+
"source": [
|
707 |
+
"%%time\n",
|
708 |
+
"inference = TextGenerationInference() # Create an instance with default settings\n",
|
709 |
+
"\n",
|
710 |
+
"# Prompt 1\n",
|
711 |
+
"# prompt = \"\"\"\n",
|
712 |
+
"# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
|
713 |
+
"\n",
|
714 |
+
"# ### Instruction:\n",
|
715 |
+
"# Write the relevance for the given input description.\n",
|
716 |
+
"\n",
|
717 |
+
"# ### Input:\n",
|
718 |
+
"# Find out about the specified mapped drive\n",
|
719 |
+
"# \"\"\"\n",
|
720 |
+
"\n",
|
721 |
+
"# Prompt 2\n",
|
722 |
+
"# prompt = \"\"\"\n",
|
723 |
+
"# ### Instruction:\n",
|
724 |
+
"# Write the relevance for the given input description.\n",
|
725 |
+
"\n",
|
726 |
+
"# ### Input:\n",
|
727 |
+
"# Find out about the specified mapped drive.\n",
|
728 |
+
"# \"\"\"\n",
|
729 |
+
"\n",
|
730 |
+
"# Prompt 3\n",
|
731 |
+
"prompt = \"\"\"\n",
|
732 |
+
"<s>[INST] <<SYS>> Write the BigFixRelevance for the following description: Give the processor information <</SYS>> [/INST]\n",
|
733 |
+
"\"\"\"\n",
|
734 |
+
"\n",
|
735 |
+
"response = inference.chat(prompt)\n",
|
736 |
+
"print(response)"
|
737 |
+
]
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"cell_type": "code",
|
741 |
+
"execution_count": null,
|
742 |
+
"metadata": {},
|
743 |
+
"outputs": [],
|
744 |
+
"source": [
|
745 |
+
"relative_path = \"./data\"\n",
|
746 |
+
"absolute_path = os.path.abspath(relative_path)\n",
|
747 |
+
"print(absolute_path)"
|
748 |
+
]
|
749 |
+
}
|
750 |
+
],
|
751 |
+
"metadata": {
|
752 |
+
"accelerator": "GPU",
|
753 |
+
"colab": {
|
754 |
+
"gpuType": "T4",
|
755 |
+
"provenance": []
|
756 |
+
},
|
757 |
+
"kernelspec": {
|
758 |
+
"display_name": "Python (whatever you want to call it)",
|
759 |
+
"language": "python",
|
760 |
+
"name": "envname"
|
761 |
+
},
|
762 |
+
"language_info": {
|
763 |
+
"codemirror_mode": {
|
764 |
+
"name": "ipython",
|
765 |
+
"version": 3
|
766 |
+
},
|
767 |
+
"file_extension": ".py",
|
768 |
+
"mimetype": "text/x-python",
|
769 |
+
"name": "python",
|
770 |
+
"nbconvert_exporter": "python",
|
771 |
+
"pygments_lexer": "ipython3",
|
772 |
+
"version": "3.10.13"
|
773 |
+
},
|
774 |
+
"widgets": {
|
775 |
+
"application/vnd.jupyter.widget-state+json": {
|
776 |
+
"1a95698310fa41f89c221f7d775fadf7": {
|
777 |
+
"model_module": "@jupyter-widgets/base",
|
778 |
+
"model_module_version": "1.2.0",
|
779 |
+
"model_name": "LayoutModel",
|
780 |
+
"state": {
|
781 |
+
"_model_module": "@jupyter-widgets/base",
|
782 |
+
"_model_module_version": "1.2.0",
|
783 |
+
"_model_name": "LayoutModel",
|
784 |
+
"_view_count": null,
|
785 |
+
"_view_module": "@jupyter-widgets/base",
|
786 |
+
"_view_module_version": "1.2.0",
|
787 |
+
"_view_name": "LayoutView",
|
788 |
+
"align_content": null,
|
789 |
+
"align_items": null,
|
790 |
+
"align_self": null,
|
791 |
+
"border": null,
|
792 |
+
"bottom": null,
|
793 |
+
"display": null,
|
794 |
+
"flex": null,
|
795 |
+
"flex_flow": null,
|
796 |
+
"grid_area": null,
|
797 |
+
"grid_auto_columns": null,
|
798 |
+
"grid_auto_flow": null,
|
799 |
+
"grid_auto_rows": null,
|
800 |
+
"grid_column": null,
|
801 |
+
"grid_gap": null,
|
802 |
+
"grid_row": null,
|
803 |
+
"grid_template_areas": null,
|
804 |
+
"grid_template_columns": null,
|
805 |
+
"grid_template_rows": null,
|
806 |
+
"height": null,
|
807 |
+
"justify_content": null,
|
808 |
+
"justify_items": null,
|
809 |
+
"left": null,
|
810 |
+
"margin": null,
|
811 |
+
"max_height": null,
|
812 |
+
"max_width": null,
|
813 |
+
"min_height": null,
|
814 |
+
"min_width": null,
|
815 |
+
"object_fit": null,
|
816 |
+
"object_position": null,
|
817 |
+
"order": null,
|
818 |
+
"overflow": null,
|
819 |
+
"overflow_x": null,
|
820 |
+
"overflow_y": null,
|
821 |
+
"padding": null,
|
822 |
+
"right": null,
|
823 |
+
"top": null,
|
824 |
+
"visibility": null,
|
825 |
+
"width": null
|
826 |
+
}
|
827 |
+
},
|
828 |
+
"1c57bab4fd2e47f1b90a3d6758fa77cb": {
|
829 |
+
"model_module": "@jupyter-widgets/controls",
|
830 |
+
"model_module_version": "1.5.0",
|
831 |
+
"model_name": "FloatProgressModel",
|
832 |
+
"state": {
|
833 |
+
"_dom_classes": [],
|
834 |
+
"_model_module": "@jupyter-widgets/controls",
|
835 |
+
"_model_module_version": "1.5.0",
|
836 |
+
"_model_name": "FloatProgressModel",
|
837 |
+
"_view_count": null,
|
838 |
+
"_view_module": "@jupyter-widgets/controls",
|
839 |
+
"_view_module_version": "1.5.0",
|
840 |
+
"_view_name": "ProgressView",
|
841 |
+
"bar_style": "success",
|
842 |
+
"description": "",
|
843 |
+
"description_tooltip": null,
|
844 |
+
"layout": "IPY_MODEL_3b8f2fec5e3d4ab69855adf5cd7b0ffb",
|
845 |
+
"max": 10,
|
846 |
+
"min": 0,
|
847 |
+
"orientation": "horizontal",
|
848 |
+
"style": "IPY_MODEL_73a820a723814b8aba81c89bac03716e",
|
849 |
+
"value": 10
|
850 |
+
}
|
851 |
+
},
|
852 |
+
"2392776c72b94666abf1fd5f62549b87": {
|
853 |
+
"model_module": "@jupyter-widgets/controls",
|
854 |
+
"model_module_version": "1.5.0",
|
855 |
+
"model_name": "HTMLModel",
|
856 |
+
"state": {
|
857 |
+
"_dom_classes": [],
|
858 |
+
"_model_module": "@jupyter-widgets/controls",
|
859 |
+
"_model_module_version": "1.5.0",
|
860 |
+
"_model_name": "HTMLModel",
|
861 |
+
"_view_count": null,
|
862 |
+
"_view_module": "@jupyter-widgets/controls",
|
863 |
+
"_view_module_version": "1.5.0",
|
864 |
+
"_view_name": "HTMLView",
|
865 |
+
"description": "",
|
866 |
+
"description_tooltip": null,
|
867 |
+
"layout": "IPY_MODEL_b8e4bd68d27a4d6386814fede84da99e",
|
868 |
+
"placeholder": "",
|
869 |
+
"style": "IPY_MODEL_94c0c32442fe47fb926be879602cdd1c",
|
870 |
+
"value": "Loading checkpoint shards: 100%"
|
871 |
+
}
|
872 |
+
},
|
873 |
+
"35c2527c83f94eb983b1ae730ed91747": {
|
874 |
+
"model_module": "@jupyter-widgets/controls",
|
875 |
+
"model_module_version": "1.5.0",
|
876 |
+
"model_name": "HBoxModel",
|
877 |
+
"state": {
|
878 |
+
"_dom_classes": [],
|
879 |
+
"_model_module": "@jupyter-widgets/controls",
|
880 |
+
"_model_module_version": "1.5.0",
|
881 |
+
"_model_name": "HBoxModel",
|
882 |
+
"_view_count": null,
|
883 |
+
"_view_module": "@jupyter-widgets/controls",
|
884 |
+
"_view_module_version": "1.5.0",
|
885 |
+
"_view_name": "HBoxView",
|
886 |
+
"box_style": "",
|
887 |
+
"children": [
|
888 |
+
"IPY_MODEL_2392776c72b94666abf1fd5f62549b87",
|
889 |
+
"IPY_MODEL_1c57bab4fd2e47f1b90a3d6758fa77cb",
|
890 |
+
"IPY_MODEL_ef48f3ab7d2c4cefbb4f6227d61d2424"
|
891 |
+
],
|
892 |
+
"layout": "IPY_MODEL_9e13b105fc8a4207969a41055d0dae4e"
|
893 |
+
}
|
894 |
+
},
|
895 |
+
"3b8f2fec5e3d4ab69855adf5cd7b0ffb": {
|
896 |
+
"model_module": "@jupyter-widgets/base",
|
897 |
+
"model_module_version": "1.2.0",
|
898 |
+
"model_name": "LayoutModel",
|
899 |
+
"state": {
|
900 |
+
"_model_module": "@jupyter-widgets/base",
|
901 |
+
"_model_module_version": "1.2.0",
|
902 |
+
"_model_name": "LayoutModel",
|
903 |
+
"_view_count": null,
|
904 |
+
"_view_module": "@jupyter-widgets/base",
|
905 |
+
"_view_module_version": "1.2.0",
|
906 |
+
"_view_name": "LayoutView",
|
907 |
+
"align_content": null,
|
908 |
+
"align_items": null,
|
909 |
+
"align_self": null,
|
910 |
+
"border": null,
|
911 |
+
"bottom": null,
|
912 |
+
"display": null,
|
913 |
+
"flex": null,
|
914 |
+
"flex_flow": null,
|
915 |
+
"grid_area": null,
|
916 |
+
"grid_auto_columns": null,
|
917 |
+
"grid_auto_flow": null,
|
918 |
+
"grid_auto_rows": null,
|
919 |
+
"grid_column": null,
|
920 |
+
"grid_gap": null,
|
921 |
+
"grid_row": null,
|
922 |
+
"grid_template_areas": null,
|
923 |
+
"grid_template_columns": null,
|
924 |
+
"grid_template_rows": null,
|
925 |
+
"height": null,
|
926 |
+
"justify_content": null,
|
927 |
+
"justify_items": null,
|
928 |
+
"left": null,
|
929 |
+
"margin": null,
|
930 |
+
"max_height": null,
|
931 |
+
"max_width": null,
|
932 |
+
"min_height": null,
|
933 |
+
"min_width": null,
|
934 |
+
"object_fit": null,
|
935 |
+
"object_position": null,
|
936 |
+
"order": null,
|
937 |
+
"overflow": null,
|
938 |
+
"overflow_x": null,
|
939 |
+
"overflow_y": null,
|
940 |
+
"padding": null,
|
941 |
+
"right": null,
|
942 |
+
"top": null,
|
943 |
+
"visibility": null,
|
944 |
+
"width": null
|
945 |
+
}
|
946 |
+
},
|
947 |
+
"73a820a723814b8aba81c89bac03716e": {
|
948 |
+
"model_module": "@jupyter-widgets/controls",
|
949 |
+
"model_module_version": "1.5.0",
|
950 |
+
"model_name": "ProgressStyleModel",
|
951 |
+
"state": {
|
952 |
+
"_model_module": "@jupyter-widgets/controls",
|
953 |
+
"_model_module_version": "1.5.0",
|
954 |
+
"_model_name": "ProgressStyleModel",
|
955 |
+
"_view_count": null,
|
956 |
+
"_view_module": "@jupyter-widgets/base",
|
957 |
+
"_view_module_version": "1.2.0",
|
958 |
+
"_view_name": "StyleView",
|
959 |
+
"bar_color": null,
|
960 |
+
"description_width": ""
|
961 |
+
}
|
962 |
+
},
|
963 |
+
"8dc5f8affc6e43329e63112a4e552fff": {
|
964 |
+
"model_module": "@jupyter-widgets/controls",
|
965 |
+
"model_module_version": "1.5.0",
|
966 |
+
"model_name": "DescriptionStyleModel",
|
967 |
+
"state": {
|
968 |
+
"_model_module": "@jupyter-widgets/controls",
|
969 |
+
"_model_module_version": "1.5.0",
|
970 |
+
"_model_name": "DescriptionStyleModel",
|
971 |
+
"_view_count": null,
|
972 |
+
"_view_module": "@jupyter-widgets/base",
|
973 |
+
"_view_module_version": "1.2.0",
|
974 |
+
"_view_name": "StyleView",
|
975 |
+
"description_width": ""
|
976 |
+
}
|
977 |
+
},
|
978 |
+
"94c0c32442fe47fb926be879602cdd1c": {
|
979 |
+
"model_module": "@jupyter-widgets/controls",
|
980 |
+
"model_module_version": "1.5.0",
|
981 |
+
"model_name": "DescriptionStyleModel",
|
982 |
+
"state": {
|
983 |
+
"_model_module": "@jupyter-widgets/controls",
|
984 |
+
"_model_module_version": "1.5.0",
|
985 |
+
"_model_name": "DescriptionStyleModel",
|
986 |
+
"_view_count": null,
|
987 |
+
"_view_module": "@jupyter-widgets/base",
|
988 |
+
"_view_module_version": "1.2.0",
|
989 |
+
"_view_name": "StyleView",
|
990 |
+
"description_width": ""
|
991 |
+
}
|
992 |
+
},
|
993 |
+
"9e13b105fc8a4207969a41055d0dae4e": {
|
994 |
+
"model_module": "@jupyter-widgets/base",
|
995 |
+
"model_module_version": "1.2.0",
|
996 |
+
"model_name": "LayoutModel",
|
997 |
+
"state": {
|
998 |
+
"_model_module": "@jupyter-widgets/base",
|
999 |
+
"_model_module_version": "1.2.0",
|
1000 |
+
"_model_name": "LayoutModel",
|
1001 |
+
"_view_count": null,
|
1002 |
+
"_view_module": "@jupyter-widgets/base",
|
1003 |
+
"_view_module_version": "1.2.0",
|
1004 |
+
"_view_name": "LayoutView",
|
1005 |
+
"align_content": null,
|
1006 |
+
"align_items": null,
|
1007 |
+
"align_self": null,
|
1008 |
+
"border": null,
|
1009 |
+
"bottom": null,
|
1010 |
+
"display": null,
|
1011 |
+
"flex": null,
|
1012 |
+
"flex_flow": null,
|
1013 |
+
"grid_area": null,
|
1014 |
+
"grid_auto_columns": null,
|
1015 |
+
"grid_auto_flow": null,
|
1016 |
+
"grid_auto_rows": null,
|
1017 |
+
"grid_column": null,
|
1018 |
+
"grid_gap": null,
|
1019 |
+
"grid_row": null,
|
1020 |
+
"grid_template_areas": null,
|
1021 |
+
"grid_template_columns": null,
|
1022 |
+
"grid_template_rows": null,
|
1023 |
+
"height": null,
|
1024 |
+
"justify_content": null,
|
1025 |
+
"justify_items": null,
|
1026 |
+
"left": null,
|
1027 |
+
"margin": null,
|
1028 |
+
"max_height": null,
|
1029 |
+
"max_width": null,
|
1030 |
+
"min_height": null,
|
1031 |
+
"min_width": null,
|
1032 |
+
"object_fit": null,
|
1033 |
+
"object_position": null,
|
1034 |
+
"order": null,
|
1035 |
+
"overflow": null,
|
1036 |
+
"overflow_x": null,
|
1037 |
+
"overflow_y": null,
|
1038 |
+
"padding": null,
|
1039 |
+
"right": null,
|
1040 |
+
"top": null,
|
1041 |
+
"visibility": null,
|
1042 |
+
"width": null
|
1043 |
+
}
|
1044 |
+
},
|
1045 |
+
"b8e4bd68d27a4d6386814fede84da99e": {
|
1046 |
+
"model_module": "@jupyter-widgets/base",
|
1047 |
+
"model_module_version": "1.2.0",
|
1048 |
+
"model_name": "LayoutModel",
|
1049 |
+
"state": {
|
1050 |
+
"_model_module": "@jupyter-widgets/base",
|
1051 |
+
"_model_module_version": "1.2.0",
|
1052 |
+
"_model_name": "LayoutModel",
|
1053 |
+
"_view_count": null,
|
1054 |
+
"_view_module": "@jupyter-widgets/base",
|
1055 |
+
"_view_module_version": "1.2.0",
|
1056 |
+
"_view_name": "LayoutView",
|
1057 |
+
"align_content": null,
|
1058 |
+
"align_items": null,
|
1059 |
+
"align_self": null,
|
1060 |
+
"border": null,
|
1061 |
+
"bottom": null,
|
1062 |
+
"display": null,
|
1063 |
+
"flex": null,
|
1064 |
+
"flex_flow": null,
|
1065 |
+
"grid_area": null,
|
1066 |
+
"grid_auto_columns": null,
|
1067 |
+
"grid_auto_flow": null,
|
1068 |
+
"grid_auto_rows": null,
|
1069 |
+
"grid_column": null,
|
1070 |
+
"grid_gap": null,
|
1071 |
+
"grid_row": null,
|
1072 |
+
"grid_template_areas": null,
|
1073 |
+
"grid_template_columns": null,
|
1074 |
+
"grid_template_rows": null,
|
1075 |
+
"height": null,
|
1076 |
+
"justify_content": null,
|
1077 |
+
"justify_items": null,
|
1078 |
+
"left": null,
|
1079 |
+
"margin": null,
|
1080 |
+
"max_height": null,
|
1081 |
+
"max_width": null,
|
1082 |
+
"min_height": null,
|
1083 |
+
"min_width": null,
|
1084 |
+
"object_fit": null,
|
1085 |
+
"object_position": null,
|
1086 |
+
"order": null,
|
1087 |
+
"overflow": null,
|
1088 |
+
"overflow_x": null,
|
1089 |
+
"overflow_y": null,
|
1090 |
+
"padding": null,
|
1091 |
+
"right": null,
|
1092 |
+
"top": null,
|
1093 |
+
"visibility": null,
|
1094 |
+
"width": null
|
1095 |
+
}
|
1096 |
+
},
|
1097 |
+
"ef48f3ab7d2c4cefbb4f6227d61d2424": {
|
1098 |
+
"model_module": "@jupyter-widgets/controls",
|
1099 |
+
"model_module_version": "1.5.0",
|
1100 |
+
"model_name": "HTMLModel",
|
1101 |
+
"state": {
|
1102 |
+
"_dom_classes": [],
|
1103 |
+
"_model_module": "@jupyter-widgets/controls",
|
1104 |
+
"_model_module_version": "1.5.0",
|
1105 |
+
"_model_name": "HTMLModel",
|
1106 |
+
"_view_count": null,
|
1107 |
+
"_view_module": "@jupyter-widgets/controls",
|
1108 |
+
"_view_module_version": "1.5.0",
|
1109 |
+
"_view_name": "HTMLView",
|
1110 |
+
"description": "",
|
1111 |
+
"description_tooltip": null,
|
1112 |
+
"layout": "IPY_MODEL_1a95698310fa41f89c221f7d775fadf7",
|
1113 |
+
"placeholder": "",
|
1114 |
+
"style": "IPY_MODEL_8dc5f8affc6e43329e63112a4e552fff",
|
1115 |
+
"value": " 10/10 [02:43<00:00, 12.78s/it]"
|
1116 |
+
}
|
1117 |
+
}
|
1118 |
+
}
|
1119 |
+
}
|
1120 |
+
},
|
1121 |
+
"nbformat": 4,
|
1122 |
+
"nbformat_minor": 1
|
1123 |
+
}
|