martimfasantos
commited on
Commit
•
ea4ba97
1
Parent(s):
149e77a
Model save
Browse files- README.md +33 -32
- adapter_config.json +5 -5
- adapter_model.safetensors +1 -1
- all_results.json +4 -4
- config.json +43 -0
- runs/May10_09-48-15_poseidon/events.out.tfevents.1715359378.poseidon.2727122.1 +3 -0
- runs/May10_17-24-35_poseidon/events.out.tfevents.1715361893.poseidon.2825808.0 +3 -0
- train_results.json +4 -4
- trainer_state.json +4 -4
- training_args.bin +1 -1
README.md
CHANGED
@@ -4,6 +4,7 @@ library_name: peft
|
|
4 |
tags:
|
5 |
- trl
|
6 |
- dpo
|
|
|
7 |
- generated_from_trainer
|
8 |
base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
9 |
model-index:
|
@@ -18,15 +19,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
|
20 |
It achieves the following results on the evaluation set:
|
|
|
|
|
|
|
|
|
21 |
- Loss: 0.6482
|
22 |
-
- Rewards/chosen: -0.9538
|
23 |
-
- Rewards/rejected: -1.1194
|
24 |
- Rewards/accuracies: 0.6171
|
|
|
25 |
- Rewards/margins: 0.1656
|
26 |
-
-
|
27 |
-
- Logps/chosen: -166.7881
|
28 |
-
- Logits/rejected: -3.0176
|
29 |
-
- Logits/chosen: -3.0239
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -93,32 +94,32 @@ The following hyperparameters were used during training:
|
|
93 |
| 0.6252 | 0.52 | 3000 | -3.0824 | -3.0782 | -148.4267 | -166.3868 | 0.6505 | 0.6055 | -0.7702 | 0.1426 | -0.9128 |
|
94 |
| 0.6082 | 0.53 | 3100 | -3.0723 | -3.0678 | -149.2047 | -167.4548 | 0.6500 | 0.6115 | -0.7779 | 0.1455 | -0.9235 |
|
95 |
| 0.6072 | 0.55 | 3200 | -3.0863 | -3.0819 | -147.0810 | -164.9669 | 0.6499 | 0.6090 | -0.7567 | 0.1419 | -0.8986 |
|
96 |
-
| 0.6142 | 0.57 | 3300 |
|
97 |
-
| 0.602 | 0.59 | 3400 |
|
98 |
-
| 0.605 | 0.6 | 3500 |
|
99 |
-
| 0.6263 | 0.62 | 3600 |
|
100 |
-
| 0.5961 | 0.64 | 3700 |
|
101 |
-
| 0.6273 | 0.65 | 3800 |
|
102 |
-
| 0.6183 | 0.67 | 3900 |
|
103 |
-
| 0.6051 | 0.69 | 4000 |
|
104 |
-
| 0.5867 | 0.71 | 4100 |
|
105 |
-
| 0.6554 | 0.72 | 4200 |
|
106 |
-
| 0.6053 | 0.74 | 4300 |
|
107 |
-
| 0.6153 | 0.76 | 4400 |
|
108 |
-
| 0.6145 | 0.78 | 4500 |
|
109 |
-
| 0.5798 | 0.79 | 4600 |
|
110 |
-
| 0.6218 | 0.81 | 4700 |
|
111 |
-
| 0.6102 | 0.83 | 4800 |
|
112 |
-
| 0.6176 | 0.84 | 4900 |
|
113 |
-
| 0.5907 | 0.86 | 5000 |
|
114 |
-
| 0.6534 | 0.88 | 5100 |
|
115 |
-
| 0.5973 | 0.9 | 5200 |
|
116 |
-
| 0.5975 | 0.91 | 5300 |
|
117 |
-
| 0.5986 | 0.93 | 5400 |
|
118 |
-
| 0.6025 | 0.95 | 5500 |
|
119 |
-
| 0.6149 | 0.96 | 5600 |
|
120 |
-
| 0.6275 | 0.98 | 5700 |
|
121 |
-
| 0.5876 | 1.0 | 5800 |
|
122 |
|
123 |
|
124 |
### Framework versions
|
|
|
4 |
tags:
|
5 |
- trl
|
6 |
- dpo
|
7 |
+
- alignment-handbook
|
8 |
- generated_from_trainer
|
9 |
base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
10 |
model-index:
|
|
|
19 |
|
20 |
This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Logits/chosen: -3.0239
|
23 |
+
- Logits/rejected: -3.0176
|
24 |
+
- Logps/chosen: -166.7881
|
25 |
+
- Logps/rejected: -187.0472
|
26 |
- Loss: 0.6482
|
|
|
|
|
27 |
- Rewards/accuracies: 0.6171
|
28 |
+
- Rewards/chosen: -0.9538
|
29 |
- Rewards/margins: 0.1656
|
30 |
+
- Rewards/rejected: -1.1194
|
|
|
|
|
|
|
31 |
|
32 |
## Model description
|
33 |
|
|
|
94 |
| 0.6252 | 0.52 | 3000 | -3.0824 | -3.0782 | -148.4267 | -166.3868 | 0.6505 | 0.6055 | -0.7702 | 0.1426 | -0.9128 |
|
95 |
| 0.6082 | 0.53 | 3100 | -3.0723 | -3.0678 | -149.2047 | -167.4548 | 0.6500 | 0.6115 | -0.7779 | 0.1455 | -0.9235 |
|
96 |
| 0.6072 | 0.55 | 3200 | -3.0863 | -3.0819 | -147.0810 | -164.9669 | 0.6499 | 0.6090 | -0.7567 | 0.1419 | -0.8986 |
|
97 |
+
| 0.6142 | 0.57 | 3300 | -3.0087 | -3.0026 | -179.2665 | -200.5992 | 0.6468 | 0.6176 | -1.0786 | 0.1764 | -1.2549 |
|
98 |
+
| 0.602 | 0.59 | 3400 | -3.0674 | -3.0624 | -150.3082 | -168.4087 | 0.6504 | 0.6136 | -0.7890 | 0.1440 | -0.9330 |
|
99 |
+
| 0.605 | 0.6 | 3500 | -3.0590 | -3.0538 | -154.1790 | -172.9109 | 0.6497 | 0.6122 | -0.8277 | 0.1503 | -0.9780 |
|
100 |
+
| 0.6263 | 0.62 | 3600 | -3.0721 | -3.0672 | -149.9757 | -168.0735 | 0.6508 | 0.6043 | -0.7857 | 0.1440 | -0.9297 |
|
101 |
+
| 0.5961 | 0.64 | 3700 | -3.0151 | -3.0090 | -169.4567 | -189.3689 | 0.6492 | 0.6136 | -0.9805 | 0.1622 | -1.1426 |
|
102 |
+
| 0.6273 | 0.65 | 3800 | -3.0117 | -3.0057 | -167.9805 | -187.6573 | 0.6494 | 0.6141 | -0.9657 | 0.1598 | -1.1255 |
|
103 |
+
| 0.6183 | 0.67 | 3900 | -3.0137 | -3.0077 | -167.4417 | -187.2734 | 0.6488 | 0.6166 | -0.9603 | 0.1613 | -1.1217 |
|
104 |
+
| 0.6051 | 0.69 | 4000 | -2.9974 | -2.9908 | -176.3739 | -197.1255 | 0.6482 | 0.6178 | -1.0496 | 0.1705 | -1.2202 |
|
105 |
+
| 0.5867 | 0.71 | 4100 | -3.0151 | -3.0088 | -169.1084 | -189.3998 | 0.6484 | 0.6125 | -0.9770 | 0.1659 | -1.1429 |
|
106 |
+
| 0.6554 | 0.72 | 4200 | -3.0270 | -3.0209 | -164.2755 | -184.0126 | 0.6489 | 0.6176 | -0.9287 | 0.1604 | -1.0891 |
|
107 |
+
| 0.6053 | 0.74 | 4300 | -3.0362 | -3.0303 | -159.9774 | -179.4446 | 0.6489 | 0.6097 | -0.8857 | 0.1577 | -1.0434 |
|
108 |
+
| 0.6153 | 0.76 | 4400 | -3.0351 | -3.0292 | -160.5470 | -180.1235 | 0.6489 | 0.6120 | -0.8914 | 0.1588 | -1.0502 |
|
109 |
+
| 0.6145 | 0.78 | 4500 | -3.0378 | -3.0319 | -160.1720 | -179.6728 | 0.6490 | 0.6113 | -0.8876 | 0.1580 | -1.0457 |
|
110 |
+
| 0.5798 | 0.79 | 4600 | -3.0308 | -3.0247 | -162.6813 | -182.4701 | 0.6488 | 0.6148 | -0.9127 | 0.1609 | -1.0736 |
|
111 |
+
| 0.6218 | 0.81 | 4700 | -3.0307 | -3.0246 | -163.0493 | -182.9482 | 0.6486 | 0.6152 | -0.9164 | 0.1620 | -1.0784 |
|
112 |
+
| 0.6102 | 0.83 | 4800 | -3.0259 | -3.0197 | -164.8939 | -184.9769 | 0.6484 | 0.6150 | -0.9348 | 0.1639 | -1.0987 |
|
113 |
+
| 0.6176 | 0.84 | 4900 | -3.0273 | -3.0211 | -165.7554 | -185.9428 | 0.6483 | 0.6157 | -0.9435 | 0.1649 | -1.1084 |
|
114 |
+
| 0.5907 | 0.86 | 5000 | -3.0259 | -3.0196 | -167.1301 | -187.4627 | 0.6482 | 0.6164 | -0.9572 | 0.1664 | -1.1236 |
|
115 |
+
| 0.6534 | 0.88 | 5100 | -3.0211 | -3.0148 | -167.2241 | -187.5712 | 0.6481 | 0.6155 | -0.9581 | 0.1665 | -1.1246 |
|
116 |
+
| 0.5973 | 0.9 | 5200 | -3.0194 | -3.0130 | -166.8823 | -187.1679 | 0.6483 | 0.6169 | -0.9547 | 0.1659 | -1.1206 |
|
117 |
+
| 0.5975 | 0.91 | 5300 | -3.0248 | -3.0185 | -166.6118 | -186.8759 | 0.6482 | 0.6162 | -0.9520 | 0.1657 | -1.1177 |
|
118 |
+
| 0.5986 | 0.93 | 5400 | -3.0249 | -3.0186 | -166.6502 | -186.8928 | 0.6483 | 0.6190 | -0.9524 | 0.1655 | -1.1179 |
|
119 |
+
| 0.6025 | 0.95 | 5500 | -3.0252 | -3.0189 | -166.7467 | -186.9980 | 0.6483 | 0.6169 | -0.9534 | 0.1655 | -1.1189 |
|
120 |
+
| 0.6149 | 0.96 | 5600 | -3.0244 | -3.0181 | -166.7859 | -187.1137 | 0.6480 | 0.6155 | -0.9538 | 0.1663 | -1.1201 |
|
121 |
+
| 0.6275 | 0.98 | 5700 | -3.0245 | -3.0182 | -166.6791 | -186.9484 | 0.6482 | 0.6178 | -0.9527 | 0.1657 | -1.1184 |
|
122 |
+
| 0.5876 | 1.0 | 5800 | -3.0239 | -3.0176 | -166.7881 | -187.0472 | 0.6482 | 0.6171 | -0.9538 | 0.1656 | -1.1194 |
|
123 |
|
124 |
|
125 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -19,13 +19,13 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"up_proj",
|
23 |
-
"down_proj",
|
24 |
-
"v_proj",
|
25 |
"q_proj",
|
26 |
-
"
|
|
|
27 |
"o_proj",
|
28 |
-
"
|
|
|
|
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
|
|
|
|
|
|
22 |
"q_proj",
|
23 |
+
"k_proj",
|
24 |
+
"up_proj",
|
25 |
"o_proj",
|
26 |
+
"gate_proj",
|
27 |
+
"down_proj",
|
28 |
+
"v_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201892728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89f07b867162674e17e76b10c0a3c2c0d12a2c5ae3970245cf2b32f61cbad484
|
3 |
size 201892728
|
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 92858,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.0003277428618961422,
|
4 |
+
"train_runtime": 17.7068,
|
5 |
"train_samples": 92858,
|
6 |
+
"train_samples_per_second": 5244.214,
|
7 |
+
"train_steps_per_second": 327.728
|
8 |
}
|
config.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 2048,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 5632,
|
14 |
+
"max_position_embeddings": 2048,
|
15 |
+
"model_type": "llama",
|
16 |
+
"num_attention_heads": 32,
|
17 |
+
"num_hidden_layers": 22,
|
18 |
+
"num_key_value_heads": 4,
|
19 |
+
"pretraining_tp": 1,
|
20 |
+
"quantization_config": {
|
21 |
+
"_load_in_4bit": true,
|
22 |
+
"_load_in_8bit": false,
|
23 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
24 |
+
"bnb_4bit_quant_storage": "uint8",
|
25 |
+
"bnb_4bit_quant_type": "nf4",
|
26 |
+
"bnb_4bit_use_double_quant": false,
|
27 |
+
"llm_int8_enable_fp32_cpu_offload": false,
|
28 |
+
"llm_int8_has_fp16_weight": false,
|
29 |
+
"llm_int8_skip_modules": null,
|
30 |
+
"llm_int8_threshold": 6.0,
|
31 |
+
"load_in_4bit": true,
|
32 |
+
"load_in_8bit": false,
|
33 |
+
"quant_method": "bitsandbytes"
|
34 |
+
},
|
35 |
+
"rms_norm_eps": 1e-05,
|
36 |
+
"rope_scaling": null,
|
37 |
+
"rope_theta": 10000.0,
|
38 |
+
"tie_word_embeddings": false,
|
39 |
+
"torch_dtype": "bfloat16",
|
40 |
+
"transformers_version": "4.39.3",
|
41 |
+
"use_cache": true,
|
42 |
+
"vocab_size": 32000
|
43 |
+
}
|
runs/May10_09-48-15_poseidon/events.out.tfevents.1715359378.poseidon.2727122.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8117149c39a95dd6b660eb33fb1db45870474c361678692b942d7f8a07b75709
|
3 |
+
size 828
|
runs/May10_17-24-35_poseidon/events.out.tfevents.1715361893.poseidon.2825808.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:334dae2d66def40d74583388cdcee54df57056687fc329ebc9be15efc5c33e8f
|
3 |
+
size 5729
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 92858,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.0003277428618961422,
|
4 |
+
"train_runtime": 17.7068,
|
5 |
"train_samples": 92858,
|
6 |
+
"train_samples_per_second": 5244.214,
|
7 |
+
"train_steps_per_second": 327.728
|
8 |
}
|
trainer_state.json
CHANGED
@@ -9655,10 +9655,10 @@
|
|
9655 |
"epoch": 1.0,
|
9656 |
"step": 5803,
|
9657 |
"total_flos": 0.0,
|
9658 |
-
"train_loss": 0.
|
9659 |
-
"train_runtime":
|
9660 |
-
"train_samples_per_second":
|
9661 |
-
"train_steps_per_second":
|
9662 |
}
|
9663 |
],
|
9664 |
"logging_steps": 10,
|
|
|
9655 |
"epoch": 1.0,
|
9656 |
"step": 5803,
|
9657 |
"total_flos": 0.0,
|
9658 |
+
"train_loss": 0.0003277428618961422,
|
9659 |
+
"train_runtime": 17.7068,
|
9660 |
+
"train_samples_per_second": 5244.214,
|
9661 |
+
"train_steps_per_second": 327.728
|
9662 |
}
|
9663 |
],
|
9664 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f6ffbe0f0ac24f76d3c2accb18e55595b98097608ade3729535f8d635c67be2
|
3 |
size 5112
|