nielsbantilan
commited on
Commit
•
0234203
1
Parent(s):
c4d63e4
Upload folder using huggingface_hub
Browse files- README.md +1 -14
- adapter_config.json +1 -1
- checkpoint-250/adapter_model/adapter_config.json +1 -1
- checkpoint-250/optimizer.pt +1 -1
- checkpoint-250/rng_state_0.pth +1 -1
- checkpoint-250/rng_state_2.pth +1 -1
- checkpoint-250/rng_state_3.pth +1 -1
- checkpoint-250/rng_state_4.pth +1 -1
- checkpoint-250/rng_state_5.pth +1 -1
- checkpoint-250/rng_state_6.pth +1 -1
- checkpoint-250/rng_state_7.pth +1 -1
- checkpoint-250/scaler.pt +1 -1
- checkpoint-250/scheduler.pt +1 -1
- checkpoint-250/trainer_state.json +24 -24
- checkpoint-250/training_args.bin +1 -1
- flyte_training_config.json +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,16 +1,3 @@
|
|
1 |
---
|
2 |
-
|
3 |
-
- yahma/alpaca-cleaned
|
4 |
-
language:
|
5 |
-
- en
|
6 |
-
license: apache-2.0
|
7 |
-
tags:
|
8 |
-
- pytorch
|
9 |
-
- causal-lm
|
10 |
-
- llama2
|
11 |
-
- fine-tuning
|
12 |
-
- alpaca
|
13 |
-
|
14 |
---
|
15 |
-
|
16 |
-
# Llama-2-7B fine-tuned on LoRA alpaca-cleaned
|
|
|
1 |
---
|
2 |
+
library_name: peft
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
adapter_config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"base_model_name_or_path": "
|
3 |
"bias": "none",
|
4 |
"fan_in_fan_out": false,
|
5 |
"inference_mode": true,
|
|
|
1 |
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
|
3 |
"bias": "none",
|
4 |
"fan_in_fan_out": false,
|
5 |
"inference_mode": true,
|
checkpoint-250/adapter_model/adapter_config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"base_model_name_or_path": "
|
3 |
"bias": "none",
|
4 |
"fan_in_fan_out": false,
|
5 |
"inference_mode": true,
|
|
|
1 |
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
|
3 |
"bias": "none",
|
4 |
"fan_in_fan_out": false,
|
5 |
"inference_mode": true,
|
checkpoint-250/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 12833221
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18df2cac8be356fd743f7f148eac89cf48c49fde41613d6cdc7042895f352262
|
3 |
size 12833221
|
checkpoint-250/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:330cf56f2e599b855551dcdd55971e6a3e8f1307545a22d5cb7f7293df0819e5
|
3 |
size 21687
|
checkpoint-250/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b0cf5557ac285d3d79e9d600b9fd06dde75ee2bf85fa4db0ad583e366008d7c
|
3 |
size 21687
|
checkpoint-250/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a4429f4e4a59e91a65cb6a362812d7e46469edc3b78a25cd70d8eac9bfbbf06
|
3 |
size 21687
|
checkpoint-250/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8a32378fcb04fef52698977561e73144e99710d3b8452f34177898cdc23a49a
|
3 |
size 21687
|
checkpoint-250/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0856a28333d12cacc5ca57b0804c61519edfcd0d56bde275f51666db53213dda
|
3 |
size 21687
|
checkpoint-250/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd2dac20dcb135df8bb4436d649668c7186f2292a69b2901cee7738ba43e7d37
|
3 |
size 21687
|
checkpoint-250/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21687
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b61b06643f500ee5089a12fd509ef9e70102eed9103b4fae7be9b97f6c37310
|
3 |
size 21687
|
checkpoint-250/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d24cd8562a35a915591169933f9382c8f93e517748969707de31a97e6966304
|
3 |
size 557
|
checkpoint-250/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d0977bcf65f9d2bcc9279899f3c18eb0a26987f9bdd682bc3d621f8a1bb352c
|
3 |
size 627
|
checkpoint-250/trainer_state.json
CHANGED
@@ -10,79 +10,79 @@
|
|
10 |
{
|
11 |
"epoch": 0.01,
|
12 |
"learning_rate": 9.949107209404665e-05,
|
13 |
-
"loss": 1.
|
14 |
"step": 20
|
15 |
},
|
16 |
{
|
17 |
"epoch": 0.01,
|
18 |
-
"learning_rate": 9.
|
19 |
-
"loss":
|
20 |
"step": 40
|
21 |
},
|
22 |
{
|
23 |
"epoch": 0.02,
|
24 |
-
"learning_rate": 8.
|
25 |
-
"loss": 0.
|
26 |
"step": 60
|
27 |
},
|
28 |
{
|
29 |
"epoch": 0.02,
|
30 |
-
"learning_rate": 8.
|
31 |
-
"loss": 0.
|
32 |
"step": 80
|
33 |
},
|
34 |
{
|
35 |
"epoch": 0.03,
|
36 |
-
"learning_rate": 7.
|
37 |
-
"loss": 0.
|
38 |
"step": 100
|
39 |
},
|
40 |
{
|
41 |
"epoch": 0.04,
|
42 |
-
"learning_rate":
|
43 |
-
"loss":
|
44 |
"step": 120
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.04,
|
48 |
-
"learning_rate":
|
49 |
-
"loss":
|
50 |
"step": 140
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.05,
|
54 |
-
"learning_rate":
|
55 |
-
"loss":
|
56 |
"step": 160
|
57 |
},
|
58 |
{
|
59 |
"epoch": 0.06,
|
60 |
-
"learning_rate":
|
61 |
-
"loss":
|
62 |
"step": 180
|
63 |
},
|
64 |
{
|
65 |
"epoch": 0.06,
|
66 |
-
"learning_rate":
|
67 |
-
"loss": 0.
|
68 |
"step": 200
|
69 |
},
|
70 |
{
|
71 |
"epoch": 0.07,
|
72 |
-
"learning_rate":
|
73 |
-
"loss":
|
74 |
"step": 220
|
75 |
},
|
76 |
{
|
77 |
"epoch": 0.07,
|
78 |
-
"learning_rate": 7.
|
79 |
-
"loss":
|
80 |
"step": 240
|
81 |
}
|
82 |
],
|
83 |
"max_steps": 250,
|
84 |
"num_train_epochs": 1,
|
85 |
-
"total_flos": 3.
|
86 |
"trial_name": null,
|
87 |
"trial_params": null
|
88 |
}
|
|
|
10 |
{
|
11 |
"epoch": 0.01,
|
12 |
"learning_rate": 9.949107209404665e-05,
|
13 |
+
"loss": 1.6265,
|
14 |
"step": 20
|
15 |
},
|
16 |
{
|
17 |
"epoch": 0.01,
|
18 |
+
"learning_rate": 9.62558321769342e-05,
|
19 |
+
"loss": 1.0084,
|
20 |
"step": 40
|
21 |
},
|
22 |
{
|
23 |
"epoch": 0.02,
|
24 |
+
"learning_rate": 8.98317278228618e-05,
|
25 |
+
"loss": 0.8582,
|
26 |
"step": 60
|
27 |
},
|
28 |
{
|
29 |
"epoch": 0.02,
|
30 |
+
"learning_rate": 8.124693337340092e-05,
|
31 |
+
"loss": 0.7713,
|
32 |
"step": 80
|
33 |
},
|
34 |
{
|
35 |
"epoch": 0.03,
|
36 |
+
"learning_rate": 7.017858456614284e-05,
|
37 |
+
"loss": 0.7922,
|
38 |
"step": 100
|
39 |
},
|
40 |
{
|
41 |
"epoch": 0.04,
|
42 |
+
"learning_rate": 5.775760687679603e-05,
|
43 |
+
"loss": 0.7959,
|
44 |
"step": 120
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.04,
|
48 |
+
"learning_rate": 4.481661436267057e-05,
|
49 |
+
"loss": 0.7505,
|
50 |
"step": 140
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.05,
|
54 |
+
"learning_rate": 3.222307917871977e-05,
|
55 |
+
"loss": 0.7691,
|
56 |
"step": 160
|
57 |
},
|
58 |
{
|
59 |
"epoch": 0.06,
|
60 |
+
"learning_rate": 2.0821182410171636e-05,
|
61 |
+
"loss": 0.7603,
|
62 |
"step": 180
|
63 |
},
|
64 |
{
|
65 |
"epoch": 0.06,
|
66 |
+
"learning_rate": 1.1375226172924341e-05,
|
67 |
+
"loss": 0.7335,
|
68 |
"step": 200
|
69 |
},
|
70 |
{
|
71 |
"epoch": 0.07,
|
72 |
+
"learning_rate": 4.5184002322740785e-06,
|
73 |
+
"loss": 0.7604,
|
74 |
"step": 220
|
75 |
},
|
76 |
{
|
77 |
"epoch": 0.07,
|
78 |
+
"learning_rate": 7.103374616951042e-07,
|
79 |
+
"loss": 0.752,
|
80 |
"step": 240
|
81 |
}
|
82 |
],
|
83 |
"max_steps": 250,
|
84 |
"num_train_epochs": 1,
|
85 |
+
"total_flos": 3.758661118512333e+16,
|
86 |
"trial_name": null,
|
87 |
"trial_params": null
|
88 |
}
|
checkpoint-250/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3963
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d560ae8d66a2c130d334196f891fd2eefc6358d4efae9e7bcba573d5cfbd0d7f
|
3 |
size 3963
|
flyte_training_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"base_model": "
|
|
|
1 |
+
{"base_model": "meta-llama/Llama-2-7b-hf", "data_path": "yahma/alpaca-cleaned", "instruction_key": "instruction", "input_key": "input", "output_key": "output", "output_dir": "./output", "device_map": "auto", "batch_size": 16, "micro_batch_size": 1, "num_epochs": 1, "max_steps": 250, "eval_steps": 200, "save_steps": 50, "learning_rate": 0.0001, "cutoff_len": 512, "val_set_size": 0, "lora_r": 8, "lora_alpha": 16, "lora_dropout": 0.05, "weight_decay": 0.02, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", "lora_target_modules": ["q_proj", "k_proj", "v_proj"], "train_on_inputs": true, "add_eos_token": true, "group_by_length": true, "resume_from_checkpoint": null, "wandb_project": "unionai-llm-fine-tuning", "wandb_run_name": "", "wandb_watch": "", "wandb_log_model": "", "debug_mode": false, "debug_train_data_size": 1024}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 25234701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:080ac13d6517a805608f5c76bf507efd41ba6128b6f62604e11e40649b67266e
|
3 |
size 25234701
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3963
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d560ae8d66a2c130d334196f891fd2eefc6358d4efae9e7bcba573d5cfbd0d7f
|
3 |
size 3963
|