PEFT
Portuguese
skoll520 commited on
Commit
82f69a2
1 Parent(s): b7922e4

Upload 4 files

Browse files

adapter model (LoRA)

adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "openlm-research/open_llama_3b_v2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 16,
12
+ "lora_dropout": 0.05,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 8,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "q_proj",
20
+ "v_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d68e85dfb62b16ac694dfb355f93b4bf0958f069735603080378b9ca4c6da9b
3
+ size 10686701
inference-cabra-kaggle.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
qLora-Training-Cabra - Paperspace.ipynb ADDED
@@ -0,0 +1,809 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "UQF7nAH1syz4"
7
+ },
8
+ "source": [
9
+ "Based on [alpaca lora](https://github.com/tloen/alpaca-lora/blob/main/finetune.py)."
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {
16
+ "execution": {
17
+ "iopub.execute_input": "2023-07-18T11:49:03.267959Z",
18
+ "iopub.status.busy": "2023-07-18T11:49:03.267686Z",
19
+ "iopub.status.idle": "2023-07-18T11:51:38.082879Z",
20
+ "shell.execute_reply": "2023-07-18T11:51:38.082079Z",
21
+ "shell.execute_reply.started": "2023-07-18T11:49:03.267936Z"
22
+ }
23
+ },
24
+ "outputs": [],
25
+ "source": [
26
+ "# !apt update\n",
27
+ "# !apt upgrade -y cuda-nvcc-12-0"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": null,
33
+ "metadata": {
34
+ "execution": {
35
+ "iopub.execute_input": "2023-07-18T11:51:38.084514Z",
36
+ "iopub.status.busy": "2023-07-18T11:51:38.084291Z",
37
+ "iopub.status.idle": "2023-07-18T11:51:40.683284Z",
38
+ "shell.execute_reply": "2023-07-18T11:51:40.682579Z",
39
+ "shell.execute_reply.started": "2023-07-18T11:51:38.084488Z"
40
+ }
41
+ },
42
+ "outputs": [],
43
+ "source": [
44
+ "import torch\n",
45
+ "print(\"Torch Version: \" + torch.__version__ + \"\\n\")\n",
46
+ "\n",
47
+ "# !nvcc --version\n",
48
+ "\n",
49
+ "# !nvidia-smi"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": null,
55
+ "metadata": {
56
+ "colab": {
57
+ "base_uri": "https://localhost:8080/"
58
+ },
59
+ "execution": {
60
+ "iopub.execute_input": "2023-07-18T11:51:40.684754Z",
61
+ "iopub.status.busy": "2023-07-18T11:51:40.684408Z",
62
+ "iopub.status.idle": "2023-07-18T11:55:27.915935Z",
63
+ "shell.execute_reply": "2023-07-18T11:55:27.915414Z",
64
+ "shell.execute_reply.started": "2023-07-18T11:51:40.684734Z"
65
+ },
66
+ "id": "RXurA0q5jtaf",
67
+ "outputId": "93942094-5399-4f21-9660-fbfd344598ee"
68
+ },
69
+ "outputs": [],
70
+ "source": [
71
+ "# !pip install -U cuda-python\n",
72
+ "# !pip3 install -U torch torchvision torchaudio #--index-url https://download.pytorch.org/whl/cu118\n",
73
+ "\n",
74
+ "# # Paperspace\n",
75
+ "# !git clone https://github.com/timdettmers/bitsandbytes.git\n",
76
+ "# !cd bitsandbytes && CUDA_VERSION=116 make cuda11x && python setup.py install\n",
77
+ "# !cp /notebooks/bitsandbytes/bitsandbytes/libbitsandbytes_cuda116.so /usr/lib/python3.9/\n",
78
+ "# !pip install -U bitsandbytes\n",
79
+ "\n",
80
+ "# # Google Colab\n",
81
+ "# #!pip install -U git+https://github.com/TimDettmers/bitsandbytes\n",
82
+ "\n",
83
+ "# !pip install -U git+https://github.com/huggingface/transformers.git\n",
84
+ "# !pip install -U git+https://github.com/huggingface/peft.git\n",
85
+ "# !pip install -U datasets accelerate"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": null,
91
+ "metadata": {
92
+ "execution": {
93
+ "iopub.execute_input": "2023-07-18T01:41:01.400102Z",
94
+ "iopub.status.busy": "2023-07-18T01:41:01.399691Z",
95
+ "iopub.status.idle": "2023-07-18T01:41:10.921838Z",
96
+ "shell.execute_reply": "2023-07-18T01:41:10.920427Z",
97
+ "shell.execute_reply.started": "2023-07-18T01:41:01.400069Z"
98
+ }
99
+ },
100
+ "outputs": [],
101
+ "source": [
102
+ "#!find / -name bitsandbytes\n",
103
+ "\n",
104
+ "#!find / -name libbitsandbytes_cuda116.so\n",
105
+ "\n",
106
+ "#!cp /notebooks/bitsandbytes/bitsandbytes/libbitsandbytes_cuda116.so /usr/lib/python3.9/\n",
107
+ "\n",
108
+ "#!ls /usr/lib/python3.9/\n",
109
+ "\n",
110
+ "#!python -m bitsandbytes"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "metadata": {
117
+ "colab": {
118
+ "base_uri": "https://localhost:8080/"
119
+ },
120
+ "execution": {
121
+ "iopub.execute_input": "2023-07-18T11:55:56.450496Z",
122
+ "iopub.status.busy": "2023-07-18T11:55:56.449938Z",
123
+ "iopub.status.idle": "2023-07-18T11:56:04.138278Z",
124
+ "shell.execute_reply": "2023-07-18T11:56:04.137468Z",
125
+ "shell.execute_reply.started": "2023-07-18T11:55:56.450472Z"
126
+ },
127
+ "id": "fhmLLJD0lM5S",
128
+ "outputId": "d1ef4a8d-156c-4e0c-b92c-2d499e8ad4ed"
129
+ },
130
+ "outputs": [],
131
+ "source": [
132
+ "import os\n",
133
+ "\n",
134
+ "# To choose a specific GPU:\n",
135
+ "# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
136
+ "\n",
137
+ "import torch\n",
138
+ "import torch.nn as nn\n",
139
+ "import bitsandbytes as bnb\n",
140
+ "from datasets import load_dataset\n",
141
+ "import transformers\n",
142
+ "from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, AutoModelForCausalLM\n",
143
+ "from peft import prepare_model_for_kbit_training, prepare_model_for_int8_training, LoraConfig, get_peft_model\n",
144
+ "from peft.peft_model import PeftModel\n"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": null,
150
+ "metadata": {
151
+ "execution": {
152
+ "iopub.execute_input": "2023-07-18T11:56:06.901323Z",
153
+ "iopub.status.busy": "2023-07-18T11:56:06.900588Z",
154
+ "iopub.status.idle": "2023-07-18T11:56:06.904953Z",
155
+ "shell.execute_reply": "2023-07-18T11:56:06.904367Z",
156
+ "shell.execute_reply.started": "2023-07-18T11:56:06.901289Z"
157
+ },
158
+ "id": "XnTp0gOUlOCU"
159
+ },
160
+ "outputs": [],
161
+ "source": [
162
+ "MICRO_BATCH_SIZE = 6 # this could actually be 5 but i like powers of 2\n",
163
+ "BATCH_SIZE = 128\n",
164
+ "GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE\n",
165
+ "EPOCHS = 2\n",
166
+ "LEARNING_RATE = 3e-4 # the Karpathy constant\n",
167
+ "CUTOFF_LEN = 256\n",
168
+ "LORA_R = 8\n",
169
+ "LORA_ALPHA = 16\n",
170
+ "LORA_DROPOUT = 0.05"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "metadata": {
177
+ "colab": {
178
+ "base_uri": "https://localhost:8080/",
179
+ "height": 49,
180
+ "referenced_widgets": [
181
+ "f3fb12d97dee43aaa51784182caed544",
182
+ "03717756681149898e8c3d40cbc16d10",
183
+ "d718c5abc21b4394a4314fdc289d830d",
184
+ "ee8532b0710541abb5f208e654b6d55a",
185
+ "af52a9b6412e46c6b82acb9afabd12d9",
186
+ "109d32c6d765417daf49b425b7ccee68",
187
+ "56217a5954624e6f8742ba914183cb9e",
188
+ "42a0b19d55b24e4ea5ce7894e4b8df50",
189
+ "4348c4efdd4d4deb945bb2838c24cd83",
190
+ "8f860f6f760e49b889fc450c53e49ac5",
191
+ "43c5c35af8f44f1f98def57ea60a9615"
192
+ ]
193
+ },
194
+ "execution": {
195
+ "iopub.execute_input": "2023-07-18T11:56:08.096435Z",
196
+ "iopub.status.busy": "2023-07-18T11:56:08.095783Z",
197
+ "iopub.status.idle": "2023-07-18T12:05:29.403294Z",
198
+ "shell.execute_reply": "2023-07-18T12:05:29.402714Z",
199
+ "shell.execute_reply.started": "2023-07-18T11:56:08.096410Z"
200
+ },
201
+ "id": "vdQfvhHo0afo",
202
+ "outputId": "71768c73-92d7-4430-cd75-3b262d86fc9b"
203
+ },
204
+ "outputs": [],
205
+ "source": [
206
+ "from huggingface_hub import snapshot_download\n",
207
+ "\n",
208
+ "model = '''openlm-research/open_llama_3b_v2'''\n",
209
+ "\"\"\"VMware/open-llama-13b-open-instruct\"\"\"\n",
210
+ "use_fast_tokenizer=False\n",
211
+ "# snapshot_download(repo_id=model)\n",
212
+ "\n",
213
+ "# LlamaTokenizer, is faster, if model is Llama\n",
214
+ "# tokenizer = LlamaTokenizer.from_pretrained(model, use_fast=use_fast_tokenizer)\n",
215
+ "# For other models:\n",
216
+ "tokenizer = AutoTokenizer.from_pretrained(model, use_fast=use_fast_tokenizer)\n",
217
+ "\n",
218
+ "# model = LlamaForCausalLM.from_pretrained(model, load_in_8bit=True, low_cpu_mem_usage=True, device_map='auto', torch_dtype=torch.float16)\n",
219
+ "# For other models:\n",
220
+ "model = AutoModelForCausalLM.from_pretrained(model, load_in_8bit=True, low_cpu_mem_usage=True, device_map='auto', torch_dtype=torch.float16)"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "code",
225
+ "execution_count": null,
226
+ "metadata": {
227
+ "execution": {
228
+ "iopub.execute_input": "2023-07-18T12:05:29.404911Z",
229
+ "iopub.status.busy": "2023-07-18T12:05:29.404341Z",
230
+ "iopub.status.idle": "2023-07-18T12:05:43.916286Z",
231
+ "shell.execute_reply": "2023-07-18T12:05:43.915849Z",
232
+ "shell.execute_reply.started": "2023-07-18T12:05:29.404890Z"
233
+ },
234
+ "id": "Xkb9pQTflS-b"
235
+ },
236
+ "outputs": [],
237
+ "source": [
238
+ "model = prepare_model_for_int8_training(model)\n",
239
+ "\n",
240
+ "config = LoraConfig(\n",
241
+ " r=LORA_R,\n",
242
+ " lora_alpha=LORA_ALPHA,\n",
243
+ " target_modules=[\"q_proj\", \"v_proj\"],\n",
244
+ " lora_dropout=LORA_DROPOUT,\n",
245
+ " bias=\"none\",\n",
246
+ " task_type=\"CAUSAL_LM\",\n",
247
+ ")\n",
248
+ "model = get_peft_model(model, config)\n",
249
+ "# model = PeftModel.from_pretrained(model, \"open-llama-3bv2-lora-cabra-adapter-120steps\", config=config)\n",
250
+ "tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token\n",
251
+ "data = load_dataset(\"json\", data_files=\"https://huggingface.co/datasets/Gustrd/dolly-15k-libretranslate-pt/resolve/main/dolly-15k-libretranslate-pt.json\")"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "execution_count": null,
257
+ "metadata": {
258
+ "execution": {
259
+ "iopub.execute_input": "2023-07-18T12:05:43.917470Z",
260
+ "iopub.status.busy": "2023-07-18T12:05:43.916924Z",
261
+ "iopub.status.idle": "2023-07-18T12:05:43.929680Z",
262
+ "shell.execute_reply": "2023-07-18T12:05:43.929220Z",
263
+ "shell.execute_reply.started": "2023-07-18T12:05:43.917450Z"
264
+ },
265
+ "id": "ad0PFPPmFRMv"
266
+ },
267
+ "outputs": [],
268
+ "source": [
269
+ "import math\n",
270
+ "\n",
271
+ "# Create a slice of the dataset to handle time constraints\n",
272
+ "# Calculate the number of rows to select for 1/2 of the data\n",
273
+ "dataSliceNumber = 1\n",
274
+ "num_rows = math.ceil(len(data['train']) // (1/dataSliceNumber))\n",
275
+ "data['train'] = data['train'].shuffle().select(range(num_rows))"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": null,
281
+ "metadata": {
282
+ "execution": {
283
+ "iopub.execute_input": "2023-07-18T12:05:43.931063Z",
284
+ "iopub.status.busy": "2023-07-18T12:05:43.930761Z",
285
+ "iopub.status.idle": "2023-07-18T12:05:43.935033Z",
286
+ "shell.execute_reply": "2023-07-18T12:05:43.934613Z",
287
+ "shell.execute_reply.started": "2023-07-18T12:05:43.931044Z"
288
+ },
289
+ "id": "_VCfL3BhlV_x"
290
+ },
291
+ "outputs": [],
292
+ "source": [
293
+ "def generate_prompt(data_point):\n",
294
+ " # desculpe o desastre de formatação, preciso ser rápido\n",
295
+ " if data_point[\"context\"]:\n",
296
+ " return f\"\"\"Abaixo está uma instrução que descreve uma tarefa, juntamente com uma entrada que fornece mais contexto. Escreva uma resposta que complete adequadamente o pedido.\n",
297
+ "### Instrução:\n",
298
+ "{data_point[\"instruction\"]}\n",
299
+ "### Entrada:\n",
300
+ "{data_point[\"context\"]}\n",
301
+ "### Resposta:\n",
302
+ "{data_point[\"response\"]}\"\"\"\n",
303
+ " else:\n",
304
+ " return f\"\"\"Abaixo está uma instrução que descreve uma tarefa. Escreva uma resposta que complete adequadamente o pedido.\n",
305
+ "### Instrução:\n",
306
+ "{data_point[\"instruction\"]}\n",
307
+ "### Resposta:\n",
308
+ "{data_point[\"response\"]}\"\"\"\n",
309
+ "\n",
310
+ "def tokenize(prompt):\n",
311
+ " # there's probably a way to do this with the tokenizer settings\n",
312
+ " # but again, gotta move fast\n",
313
+ " result = tokenizer(\n",
314
+ " prompt,\n",
315
+ " truncation=True,\n",
316
+ " max_length=CUTOFF_LEN + 1,\n",
317
+ " padding=\"max_length\",\n",
318
+ " )\n",
319
+ " return {\n",
320
+ " \"input_ids\": result[\"input_ids\"][:-1],\n",
321
+ " \"attention_mask\": result[\"attention_mask\"][:-1],\n",
322
+ " }"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "metadata": {
329
+ "execution": {
330
+ "iopub.execute_input": "2023-07-18T12:05:43.936195Z",
331
+ "iopub.status.busy": "2023-07-18T12:05:43.935598Z",
332
+ "iopub.status.idle": "2023-07-18T12:06:03.137794Z",
333
+ "shell.execute_reply": "2023-07-18T12:06:03.136986Z",
334
+ "shell.execute_reply.started": "2023-07-18T12:05:43.936177Z"
335
+ },
336
+ "id": "81oSm3GL9z72"
337
+ },
338
+ "outputs": [],
339
+ "source": [
340
+ "data = data.shuffle().map(lambda x: tokenize(generate_prompt(x)))"
341
+ ]
342
+ },
343
+ {
344
+ "cell_type": "code",
345
+ "execution_count": null,
346
+ "metadata": {
347
+ "execution": {
348
+ "iopub.execute_input": "2023-07-18T12:06:03.139179Z",
349
+ "iopub.status.busy": "2023-07-18T12:06:03.138947Z",
350
+ "iopub.status.idle": "2023-07-18T12:06:03.292197Z",
351
+ "shell.execute_reply": "2023-07-18T12:06:03.291371Z",
352
+ "shell.execute_reply.started": "2023-07-18T12:06:03.139159Z"
353
+ },
354
+ "id": "INGJJZ6dkpJu"
355
+ },
356
+ "outputs": [],
357
+ "source": [
358
+ "trainer = transformers.Trainer(\n",
359
+ " model=model,\n",
360
+ " train_dataset=data[\"train\"],\n",
361
+ " args=transformers.TrainingArguments(\n",
362
+ " per_device_train_batch_size=MICRO_BATCH_SIZE,\n",
363
+ " gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n",
364
+ " warmup_steps=100,\n",
365
+ " num_train_epochs=EPOCHS,\n",
366
+ " learning_rate=LEARNING_RATE,\n",
367
+ " fp16=True,\n",
368
+ " logging_steps=20,\n",
369
+ " output_dir=\"lora-cabra-3Bv2\",\n",
370
+ " save_total_limit=4, \n",
371
+ " save_steps=20\n",
372
+ " ),\n",
373
+ " data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
374
+ ")\n",
375
+ "model.config.use_cache = False\n",
376
+ "\n"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": null,
382
+ "metadata": {
383
+ "execution": {
384
+ "iopub.execute_input": "2023-07-18T12:06:03.293628Z",
385
+ "iopub.status.busy": "2023-07-18T12:06:03.293172Z",
386
+ "iopub.status.idle": "2023-07-18T19:30:15.565676Z",
387
+ "shell.execute_reply": "2023-07-18T19:30:15.564948Z",
388
+ "shell.execute_reply.started": "2023-07-18T12:06:03.293609Z"
389
+ },
390
+ "id": "XrdM-8F8_59v"
391
+ },
392
+ "outputs": [],
393
+ "source": [
394
+ "\n",
395
+ "trainer.train(resume_from_checkpoint=True)"
396
+ ]
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": null,
401
+ "metadata": {
402
+ "execution": {
403
+ "iopub.execute_input": "2023-07-18T19:30:15.567903Z",
404
+ "iopub.status.busy": "2023-07-18T19:30:15.567445Z",
405
+ "iopub.status.idle": "2023-07-18T19:30:15.617103Z",
406
+ "shell.execute_reply": "2023-07-18T19:30:15.616512Z",
407
+ "shell.execute_reply.started": "2023-07-18T19:30:15.567879Z"
408
+ },
409
+ "id": "3JY4QEi7lXyY"
410
+ },
411
+ "outputs": [],
412
+ "source": [
413
+ "model.save_pretrained(\"open-llama-3bv2-lora-cabra-adapter-140steps\")"
414
+ ]
415
+ },
416
+ {
417
+ "cell_type": "code",
418
+ "execution_count": null,
419
+ "metadata": {
420
+ "execution": {
421
+ "iopub.execute_input": "2023-07-18T19:45:30.280575Z",
422
+ "iopub.status.busy": "2023-07-18T19:45:30.280294Z",
423
+ "iopub.status.idle": "2023-07-18T19:45:31.880370Z",
424
+ "shell.execute_reply": "2023-07-18T19:45:31.879581Z",
425
+ "shell.execute_reply.started": "2023-07-18T19:45:30.280556Z"
426
+ }
427
+ },
428
+ "outputs": [],
429
+ "source": [
430
+ "# !tar -czvf open-llama-13b-lora-cabra-adapter.tar.gz ./open-llama-13b-lora-cabra-adapter"
431
+ ]
432
+ }
433
+ ],
434
+ "metadata": {
435
+ "accelerator": "GPU",
436
+ "colab": {
437
+ "gpuClass": "premium",
438
+ "gpuType": "T4",
439
+ "machine_shape": "hm",
440
+ "provenance": []
441
+ },
442
+ "gpuClass": "premium",
443
+ "kernelspec": {
444
+ "display_name": "Python 3 (ipykernel)",
445
+ "language": "python",
446
+ "name": "python3"
447
+ },
448
+ "language_info": {
449
+ "codemirror_mode": {
450
+ "name": "ipython",
451
+ "version": 3
452
+ },
453
+ "file_extension": ".py",
454
+ "mimetype": "text/x-python",
455
+ "name": "python",
456
+ "nbconvert_exporter": "python",
457
+ "pygments_lexer": "ipython3",
458
+ "version": "3.10.12"
459
+ },
460
+ "widgets": {
461
+ "application/vnd.jupyter.widget-state+json": {
462
+ "03717756681149898e8c3d40cbc16d10": {
463
+ "model_module": "@jupyter-widgets/controls",
464
+ "model_module_version": "1.5.0",
465
+ "model_name": "HTMLModel",
466
+ "state": {
467
+ "_dom_classes": [],
468
+ "_model_module": "@jupyter-widgets/controls",
469
+ "_model_module_version": "1.5.0",
470
+ "_model_name": "HTMLModel",
471
+ "_view_count": null,
472
+ "_view_module": "@jupyter-widgets/controls",
473
+ "_view_module_version": "1.5.0",
474
+ "_view_name": "HTMLView",
475
+ "description": "",
476
+ "description_tooltip": null,
477
+ "layout": "IPY_MODEL_109d32c6d765417daf49b425b7ccee68",
478
+ "placeholder": "​",
479
+ "style": "IPY_MODEL_56217a5954624e6f8742ba914183cb9e",
480
+ "value": "Loading checkpoint shards: 0%"
481
+ }
482
+ },
483
+ "109d32c6d765417daf49b425b7ccee68": {
484
+ "model_module": "@jupyter-widgets/base",
485
+ "model_module_version": "1.2.0",
486
+ "model_name": "LayoutModel",
487
+ "state": {
488
+ "_model_module": "@jupyter-widgets/base",
489
+ "_model_module_version": "1.2.0",
490
+ "_model_name": "LayoutModel",
491
+ "_view_count": null,
492
+ "_view_module": "@jupyter-widgets/base",
493
+ "_view_module_version": "1.2.0",
494
+ "_view_name": "LayoutView",
495
+ "align_content": null,
496
+ "align_items": null,
497
+ "align_self": null,
498
+ "border": null,
499
+ "bottom": null,
500
+ "display": null,
501
+ "flex": null,
502
+ "flex_flow": null,
503
+ "grid_area": null,
504
+ "grid_auto_columns": null,
505
+ "grid_auto_flow": null,
506
+ "grid_auto_rows": null,
507
+ "grid_column": null,
508
+ "grid_gap": null,
509
+ "grid_row": null,
510
+ "grid_template_areas": null,
511
+ "grid_template_columns": null,
512
+ "grid_template_rows": null,
513
+ "height": null,
514
+ "justify_content": null,
515
+ "justify_items": null,
516
+ "left": null,
517
+ "margin": null,
518
+ "max_height": null,
519
+ "max_width": null,
520
+ "min_height": null,
521
+ "min_width": null,
522
+ "object_fit": null,
523
+ "object_position": null,
524
+ "order": null,
525
+ "overflow": null,
526
+ "overflow_x": null,
527
+ "overflow_y": null,
528
+ "padding": null,
529
+ "right": null,
530
+ "top": null,
531
+ "visibility": null,
532
+ "width": null
533
+ }
534
+ },
535
+ "42a0b19d55b24e4ea5ce7894e4b8df50": {
536
+ "model_module": "@jupyter-widgets/base",
537
+ "model_module_version": "1.2.0",
538
+ "model_name": "LayoutModel",
539
+ "state": {
540
+ "_model_module": "@jupyter-widgets/base",
541
+ "_model_module_version": "1.2.0",
542
+ "_model_name": "LayoutModel",
543
+ "_view_count": null,
544
+ "_view_module": "@jupyter-widgets/base",
545
+ "_view_module_version": "1.2.0",
546
+ "_view_name": "LayoutView",
547
+ "align_content": null,
548
+ "align_items": null,
549
+ "align_self": null,
550
+ "border": null,
551
+ "bottom": null,
552
+ "display": null,
553
+ "flex": null,
554
+ "flex_flow": null,
555
+ "grid_area": null,
556
+ "grid_auto_columns": null,
557
+ "grid_auto_flow": null,
558
+ "grid_auto_rows": null,
559
+ "grid_column": null,
560
+ "grid_gap": null,
561
+ "grid_row": null,
562
+ "grid_template_areas": null,
563
+ "grid_template_columns": null,
564
+ "grid_template_rows": null,
565
+ "height": null,
566
+ "justify_content": null,
567
+ "justify_items": null,
568
+ "left": null,
569
+ "margin": null,
570
+ "max_height": null,
571
+ "max_width": null,
572
+ "min_height": null,
573
+ "min_width": null,
574
+ "object_fit": null,
575
+ "object_position": null,
576
+ "order": null,
577
+ "overflow": null,
578
+ "overflow_x": null,
579
+ "overflow_y": null,
580
+ "padding": null,
581
+ "right": null,
582
+ "top": null,
583
+ "visibility": null,
584
+ "width": null
585
+ }
586
+ },
587
+ "4348c4efdd4d4deb945bb2838c24cd83": {
588
+ "model_module": "@jupyter-widgets/controls",
589
+ "model_module_version": "1.5.0",
590
+ "model_name": "ProgressStyleModel",
591
+ "state": {
592
+ "_model_module": "@jupyter-widgets/controls",
593
+ "_model_module_version": "1.5.0",
594
+ "_model_name": "ProgressStyleModel",
595
+ "_view_count": null,
596
+ "_view_module": "@jupyter-widgets/base",
597
+ "_view_module_version": "1.2.0",
598
+ "_view_name": "StyleView",
599
+ "bar_color": null,
600
+ "description_width": ""
601
+ }
602
+ },
603
+ "43c5c35af8f44f1f98def57ea60a9615": {
604
+ "model_module": "@jupyter-widgets/controls",
605
+ "model_module_version": "1.5.0",
606
+ "model_name": "DescriptionStyleModel",
607
+ "state": {
608
+ "_model_module": "@jupyter-widgets/controls",
609
+ "_model_module_version": "1.5.0",
610
+ "_model_name": "DescriptionStyleModel",
611
+ "_view_count": null,
612
+ "_view_module": "@jupyter-widgets/base",
613
+ "_view_module_version": "1.2.0",
614
+ "_view_name": "StyleView",
615
+ "description_width": ""
616
+ }
617
+ },
618
+ "56217a5954624e6f8742ba914183cb9e": {
619
+ "model_module": "@jupyter-widgets/controls",
620
+ "model_module_version": "1.5.0",
621
+ "model_name": "DescriptionStyleModel",
622
+ "state": {
623
+ "_model_module": "@jupyter-widgets/controls",
624
+ "_model_module_version": "1.5.0",
625
+ "_model_name": "DescriptionStyleModel",
626
+ "_view_count": null,
627
+ "_view_module": "@jupyter-widgets/base",
628
+ "_view_module_version": "1.2.0",
629
+ "_view_name": "StyleView",
630
+ "description_width": ""
631
+ }
632
+ },
633
+ "8f860f6f760e49b889fc450c53e49ac5": {
634
+ "model_module": "@jupyter-widgets/base",
635
+ "model_module_version": "1.2.0",
636
+ "model_name": "LayoutModel",
637
+ "state": {
638
+ "_model_module": "@jupyter-widgets/base",
639
+ "_model_module_version": "1.2.0",
640
+ "_model_name": "LayoutModel",
641
+ "_view_count": null,
642
+ "_view_module": "@jupyter-widgets/base",
643
+ "_view_module_version": "1.2.0",
644
+ "_view_name": "LayoutView",
645
+ "align_content": null,
646
+ "align_items": null,
647
+ "align_self": null,
648
+ "border": null,
649
+ "bottom": null,
650
+ "display": null,
651
+ "flex": null,
652
+ "flex_flow": null,
653
+ "grid_area": null,
654
+ "grid_auto_columns": null,
655
+ "grid_auto_flow": null,
656
+ "grid_auto_rows": null,
657
+ "grid_column": null,
658
+ "grid_gap": null,
659
+ "grid_row": null,
660
+ "grid_template_areas": null,
661
+ "grid_template_columns": null,
662
+ "grid_template_rows": null,
663
+ "height": null,
664
+ "justify_content": null,
665
+ "justify_items": null,
666
+ "left": null,
667
+ "margin": null,
668
+ "max_height": null,
669
+ "max_width": null,
670
+ "min_height": null,
671
+ "min_width": null,
672
+ "object_fit": null,
673
+ "object_position": null,
674
+ "order": null,
675
+ "overflow": null,
676
+ "overflow_x": null,
677
+ "overflow_y": null,
678
+ "padding": null,
679
+ "right": null,
680
+ "top": null,
681
+ "visibility": null,
682
+ "width": null
683
+ }
684
+ },
685
+ "af52a9b6412e46c6b82acb9afabd12d9": {
686
+ "model_module": "@jupyter-widgets/base",
687
+ "model_module_version": "1.2.0",
688
+ "model_name": "LayoutModel",
689
+ "state": {
690
+ "_model_module": "@jupyter-widgets/base",
691
+ "_model_module_version": "1.2.0",
692
+ "_model_name": "LayoutModel",
693
+ "_view_count": null,
694
+ "_view_module": "@jupyter-widgets/base",
695
+ "_view_module_version": "1.2.0",
696
+ "_view_name": "LayoutView",
697
+ "align_content": null,
698
+ "align_items": null,
699
+ "align_self": null,
700
+ "border": null,
701
+ "bottom": null,
702
+ "display": null,
703
+ "flex": null,
704
+ "flex_flow": null,
705
+ "grid_area": null,
706
+ "grid_auto_columns": null,
707
+ "grid_auto_flow": null,
708
+ "grid_auto_rows": null,
709
+ "grid_column": null,
710
+ "grid_gap": null,
711
+ "grid_row": null,
712
+ "grid_template_areas": null,
713
+ "grid_template_columns": null,
714
+ "grid_template_rows": null,
715
+ "height": null,
716
+ "justify_content": null,
717
+ "justify_items": null,
718
+ "left": null,
719
+ "margin": null,
720
+ "max_height": null,
721
+ "max_width": null,
722
+ "min_height": null,
723
+ "min_width": null,
724
+ "object_fit": null,
725
+ "object_position": null,
726
+ "order": null,
727
+ "overflow": null,
728
+ "overflow_x": null,
729
+ "overflow_y": null,
730
+ "padding": null,
731
+ "right": null,
732
+ "top": null,
733
+ "visibility": null,
734
+ "width": null
735
+ }
736
+ },
737
+ "d718c5abc21b4394a4314fdc289d830d": {
738
+ "model_module": "@jupyter-widgets/controls",
739
+ "model_module_version": "1.5.0",
740
+ "model_name": "FloatProgressModel",
741
+ "state": {
742
+ "_dom_classes": [],
743
+ "_model_module": "@jupyter-widgets/controls",
744
+ "_model_module_version": "1.5.0",
745
+ "_model_name": "FloatProgressModel",
746
+ "_view_count": null,
747
+ "_view_module": "@jupyter-widgets/controls",
748
+ "_view_module_version": "1.5.0",
749
+ "_view_name": "ProgressView",
750
+ "bar_style": "",
751
+ "description": "",
752
+ "description_tooltip": null,
753
+ "layout": "IPY_MODEL_42a0b19d55b24e4ea5ce7894e4b8df50",
754
+ "max": 2,
755
+ "min": 0,
756
+ "orientation": "horizontal",
757
+ "style": "IPY_MODEL_4348c4efdd4d4deb945bb2838c24cd83",
758
+ "value": 0
759
+ }
760
+ },
761
+ "ee8532b0710541abb5f208e654b6d55a": {
762
+ "model_module": "@jupyter-widgets/controls",
763
+ "model_module_version": "1.5.0",
764
+ "model_name": "HTMLModel",
765
+ "state": {
766
+ "_dom_classes": [],
767
+ "_model_module": "@jupyter-widgets/controls",
768
+ "_model_module_version": "1.5.0",
769
+ "_model_name": "HTMLModel",
770
+ "_view_count": null,
771
+ "_view_module": "@jupyter-widgets/controls",
772
+ "_view_module_version": "1.5.0",
773
+ "_view_name": "HTMLView",
774
+ "description": "",
775
+ "description_tooltip": null,
776
+ "layout": "IPY_MODEL_8f860f6f760e49b889fc450c53e49ac5",
777
+ "placeholder": "​",
778
+ "style": "IPY_MODEL_43c5c35af8f44f1f98def57ea60a9615",
779
+ "value": " 0/2 [00:00<?, ?it/s]"
780
+ }
781
+ },
782
+ "f3fb12d97dee43aaa51784182caed544": {
783
+ "model_module": "@jupyter-widgets/controls",
784
+ "model_module_version": "1.5.0",
785
+ "model_name": "HBoxModel",
786
+ "state": {
787
+ "_dom_classes": [],
788
+ "_model_module": "@jupyter-widgets/controls",
789
+ "_model_module_version": "1.5.0",
790
+ "_model_name": "HBoxModel",
791
+ "_view_count": null,
792
+ "_view_module": "@jupyter-widgets/controls",
793
+ "_view_module_version": "1.5.0",
794
+ "_view_name": "HBoxView",
795
+ "box_style": "",
796
+ "children": [
797
+ "IPY_MODEL_03717756681149898e8c3d40cbc16d10",
798
+ "IPY_MODEL_d718c5abc21b4394a4314fdc289d830d",
799
+ "IPY_MODEL_ee8532b0710541abb5f208e654b6d55a"
800
+ ],
801
+ "layout": "IPY_MODEL_af52a9b6412e46c6b82acb9afabd12d9"
802
+ }
803
+ }
804
+ }
805
+ }
806
+ },
807
+ "nbformat": 4,
808
+ "nbformat_minor": 4
809
+ }