martimfasantos commited on
Commit
ea4ba97
1 Parent(s): 149e77a

Model save

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ library_name: peft
4
  tags:
5
  - trl
6
  - dpo
 
7
  - generated_from_trainer
8
  base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
9
  model-index:
@@ -18,15 +19,15 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
 
 
 
 
21
  - Loss: 0.6482
22
- - Rewards/chosen: -0.9538
23
- - Rewards/rejected: -1.1194
24
  - Rewards/accuracies: 0.6171
 
25
  - Rewards/margins: 0.1656
26
- - Logps/rejected: -187.0472
27
- - Logps/chosen: -166.7881
28
- - Logits/rejected: -3.0176
29
- - Logits/chosen: -3.0239
30
 
31
  ## Model description
32
 
@@ -93,32 +94,32 @@ The following hyperparameters were used during training:
93
  | 0.6252 | 0.52 | 3000 | -3.0824 | -3.0782 | -148.4267 | -166.3868 | 0.6505 | 0.6055 | -0.7702 | 0.1426 | -0.9128 |
94
  | 0.6082 | 0.53 | 3100 | -3.0723 | -3.0678 | -149.2047 | -167.4548 | 0.6500 | 0.6115 | -0.7779 | 0.1455 | -0.9235 |
95
  | 0.6072 | 0.55 | 3200 | -3.0863 | -3.0819 | -147.0810 | -164.9669 | 0.6499 | 0.6090 | -0.7567 | 0.1419 | -0.8986 |
96
- | 0.6142 | 0.57 | 3300 | 0.6468 | -1.0786 | -1.2549 | 0.6176 | 0.1764 | -200.5992 | -179.2665 | -3.0026 | -3.0087 |
97
- | 0.602 | 0.59 | 3400 | 0.6504 | -0.7890 | -0.9330 | 0.6136 | 0.1440 | -168.4087 | -150.3082 | -3.0624 | -3.0674 |
98
- | 0.605 | 0.6 | 3500 | 0.6497 | -0.8277 | -0.9780 | 0.6122 | 0.1503 | -172.9109 | -154.1790 | -3.0538 | -3.0590 |
99
- | 0.6263 | 0.62 | 3600 | 0.6508 | -0.7857 | -0.9297 | 0.6043 | 0.1440 | -168.0735 | -149.9757 | -3.0672 | -3.0721 |
100
- | 0.5961 | 0.64 | 3700 | 0.6492 | -0.9805 | -1.1426 | 0.6136 | 0.1622 | -189.3689 | -169.4567 | -3.0090 | -3.0151 |
101
- | 0.6273 | 0.65 | 3800 | 0.6494 | -0.9657 | -1.1255 | 0.6141 | 0.1598 | -187.6573 | -167.9805 | -3.0057 | -3.0117 |
102
- | 0.6183 | 0.67 | 3900 | 0.6488 | -0.9603 | -1.1217 | 0.6166 | 0.1613 | -187.2734 | -167.4417 | -3.0077 | -3.0137 |
103
- | 0.6051 | 0.69 | 4000 | 0.6482 | -1.0496 | -1.2202 | 0.6178 | 0.1705 | -197.1255 | -176.3739 | -2.9908 | -2.9974 |
104
- | 0.5867 | 0.71 | 4100 | 0.6484 | -0.9770 | -1.1429 | 0.6125 | 0.1659 | -189.3998 | -169.1084 | -3.0088 | -3.0151 |
105
- | 0.6554 | 0.72 | 4200 | 0.6489 | -0.9287 | -1.0891 | 0.6176 | 0.1604 | -184.0126 | -164.2755 | -3.0209 | -3.0270 |
106
- | 0.6053 | 0.74 | 4300 | 0.6489 | -0.8857 | -1.0434 | 0.6097 | 0.1577 | -179.4446 | -159.9774 | -3.0303 | -3.0362 |
107
- | 0.6153 | 0.76 | 4400 | 0.6489 | -0.8914 | -1.0502 | 0.6120 | 0.1588 | -180.1235 | -160.5470 | -3.0292 | -3.0351 |
108
- | 0.6145 | 0.78 | 4500 | 0.6490 | -0.8876 | -1.0457 | 0.6113 | 0.1580 | -179.6728 | -160.1720 | -3.0319 | -3.0378 |
109
- | 0.5798 | 0.79 | 4600 | 0.6488 | -0.9127 | -1.0736 | 0.6148 | 0.1609 | -182.4701 | -162.6813 | -3.0247 | -3.0308 |
110
- | 0.6218 | 0.81 | 4700 | 0.6486 | -0.9164 | -1.0784 | 0.6152 | 0.1620 | -182.9482 | -163.0493 | -3.0246 | -3.0307 |
111
- | 0.6102 | 0.83 | 4800 | 0.6484 | -0.9348 | -1.0987 | 0.6150 | 0.1639 | -184.9769 | -164.8939 | -3.0197 | -3.0259 |
112
- | 0.6176 | 0.84 | 4900 | 0.6483 | -0.9435 | -1.1084 | 0.6157 | 0.1649 | -185.9428 | -165.7554 | -3.0211 | -3.0273 |
113
- | 0.5907 | 0.86 | 5000 | 0.6482 | -0.9572 | -1.1236 | 0.6164 | 0.1664 | -187.4627 | -167.1301 | -3.0196 | -3.0259 |
114
- | 0.6534 | 0.88 | 5100 | 0.6481 | -0.9581 | -1.1246 | 0.6155 | 0.1665 | -187.5712 | -167.2241 | -3.0148 | -3.0211 |
115
- | 0.5973 | 0.9 | 5200 | 0.6483 | -0.9547 | -1.1206 | 0.6169 | 0.1659 | -187.1679 | -166.8823 | -3.0130 | -3.0194 |
116
- | 0.5975 | 0.91 | 5300 | 0.6482 | -0.9520 | -1.1177 | 0.6162 | 0.1657 | -186.8759 | -166.6118 | -3.0185 | -3.0248 |
117
- | 0.5986 | 0.93 | 5400 | 0.6483 | -0.9524 | -1.1179 | 0.6190 | 0.1655 | -186.8928 | -166.6502 | -3.0186 | -3.0249 |
118
- | 0.6025 | 0.95 | 5500 | 0.6483 | -0.9534 | -1.1189 | 0.6169 | 0.1655 | -186.9980 | -166.7467 | -3.0189 | -3.0252 |
119
- | 0.6149 | 0.96 | 5600 | 0.6480 | -0.9538 | -1.1201 | 0.6155 | 0.1663 | -187.1137 | -166.7859 | -3.0181 | -3.0244 |
120
- | 0.6275 | 0.98 | 5700 | 0.6482 | -0.9527 | -1.1184 | 0.6178 | 0.1657 | -186.9484 | -166.6791 | -3.0182 | -3.0245 |
121
- | 0.5876 | 1.0 | 5800 | 0.6482 | -0.9538 | -1.1194 | 0.6171 | 0.1656 | -187.0472 | -166.7881 | -3.0176 | -3.0239 |
122
 
123
 
124
  ### Framework versions
 
4
  tags:
5
  - trl
6
  - dpo
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
10
  model-index:
 
19
 
20
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Logits/chosen: -3.0239
23
+ - Logits/rejected: -3.0176
24
+ - Logps/chosen: -166.7881
25
+ - Logps/rejected: -187.0472
26
  - Loss: 0.6482
 
 
27
  - Rewards/accuracies: 0.6171
28
+ - Rewards/chosen: -0.9538
29
  - Rewards/margins: 0.1656
30
+ - Rewards/rejected: -1.1194
 
 
 
31
 
32
  ## Model description
33
 
 
94
  | 0.6252 | 0.52 | 3000 | -3.0824 | -3.0782 | -148.4267 | -166.3868 | 0.6505 | 0.6055 | -0.7702 | 0.1426 | -0.9128 |
95
  | 0.6082 | 0.53 | 3100 | -3.0723 | -3.0678 | -149.2047 | -167.4548 | 0.6500 | 0.6115 | -0.7779 | 0.1455 | -0.9235 |
96
  | 0.6072 | 0.55 | 3200 | -3.0863 | -3.0819 | -147.0810 | -164.9669 | 0.6499 | 0.6090 | -0.7567 | 0.1419 | -0.8986 |
97
+ | 0.6142 | 0.57 | 3300 | -3.0087 | -3.0026 | -179.2665 | -200.5992 | 0.6468 | 0.6176 | -1.0786 | 0.1764 | -1.2549 |
98
+ | 0.602 | 0.59 | 3400 | -3.0674 | -3.0624 | -150.3082 | -168.4087 | 0.6504 | 0.6136 | -0.7890 | 0.1440 | -0.9330 |
99
+ | 0.605 | 0.6 | 3500 | -3.0590 | -3.0538 | -154.1790 | -172.9109 | 0.6497 | 0.6122 | -0.8277 | 0.1503 | -0.9780 |
100
+ | 0.6263 | 0.62 | 3600 | -3.0721 | -3.0672 | -149.9757 | -168.0735 | 0.6508 | 0.6043 | -0.7857 | 0.1440 | -0.9297 |
101
+ | 0.5961 | 0.64 | 3700 | -3.0151 | -3.0090 | -169.4567 | -189.3689 | 0.6492 | 0.6136 | -0.9805 | 0.1622 | -1.1426 |
102
+ | 0.6273 | 0.65 | 3800 | -3.0117 | -3.0057 | -167.9805 | -187.6573 | 0.6494 | 0.6141 | -0.9657 | 0.1598 | -1.1255 |
103
+ | 0.6183 | 0.67 | 3900 | -3.0137 | -3.0077 | -167.4417 | -187.2734 | 0.6488 | 0.6166 | -0.9603 | 0.1613 | -1.1217 |
104
+ | 0.6051 | 0.69 | 4000 | -2.9974 | -2.9908 | -176.3739 | -197.1255 | 0.6482 | 0.6178 | -1.0496 | 0.1705 | -1.2202 |
105
+ | 0.5867 | 0.71 | 4100 | -3.0151 | -3.0088 | -169.1084 | -189.3998 | 0.6484 | 0.6125 | -0.9770 | 0.1659 | -1.1429 |
106
+ | 0.6554 | 0.72 | 4200 | -3.0270 | -3.0209 | -164.2755 | -184.0126 | 0.6489 | 0.6176 | -0.9287 | 0.1604 | -1.0891 |
107
+ | 0.6053 | 0.74 | 4300 | -3.0362 | -3.0303 | -159.9774 | -179.4446 | 0.6489 | 0.6097 | -0.8857 | 0.1577 | -1.0434 |
108
+ | 0.6153 | 0.76 | 4400 | -3.0351 | -3.0292 | -160.5470 | -180.1235 | 0.6489 | 0.6120 | -0.8914 | 0.1588 | -1.0502 |
109
+ | 0.6145 | 0.78 | 4500 | -3.0378 | -3.0319 | -160.1720 | -179.6728 | 0.6490 | 0.6113 | -0.8876 | 0.1580 | -1.0457 |
110
+ | 0.5798 | 0.79 | 4600 | -3.0308 | -3.0247 | -162.6813 | -182.4701 | 0.6488 | 0.6148 | -0.9127 | 0.1609 | -1.0736 |
111
+ | 0.6218 | 0.81 | 4700 | -3.0307 | -3.0246 | -163.0493 | -182.9482 | 0.6486 | 0.6152 | -0.9164 | 0.1620 | -1.0784 |
112
+ | 0.6102 | 0.83 | 4800 | -3.0259 | -3.0197 | -164.8939 | -184.9769 | 0.6484 | 0.6150 | -0.9348 | 0.1639 | -1.0987 |
113
+ | 0.6176 | 0.84 | 4900 | -3.0273 | -3.0211 | -165.7554 | -185.9428 | 0.6483 | 0.6157 | -0.9435 | 0.1649 | -1.1084 |
114
+ | 0.5907 | 0.86 | 5000 | -3.0259 | -3.0196 | -167.1301 | -187.4627 | 0.6482 | 0.6164 | -0.9572 | 0.1664 | -1.1236 |
115
+ | 0.6534 | 0.88 | 5100 | -3.0211 | -3.0148 | -167.2241 | -187.5712 | 0.6481 | 0.6155 | -0.9581 | 0.1665 | -1.1246 |
116
+ | 0.5973 | 0.9 | 5200 | -3.0194 | -3.0130 | -166.8823 | -187.1679 | 0.6483 | 0.6169 | -0.9547 | 0.1659 | -1.1206 |
117
+ | 0.5975 | 0.91 | 5300 | -3.0248 | -3.0185 | -166.6118 | -186.8759 | 0.6482 | 0.6162 | -0.9520 | 0.1657 | -1.1177 |
118
+ | 0.5986 | 0.93 | 5400 | -3.0249 | -3.0186 | -166.6502 | -186.8928 | 0.6483 | 0.6190 | -0.9524 | 0.1655 | -1.1179 |
119
+ | 0.6025 | 0.95 | 5500 | -3.0252 | -3.0189 | -166.7467 | -186.9980 | 0.6483 | 0.6169 | -0.9534 | 0.1655 | -1.1189 |
120
+ | 0.6149 | 0.96 | 5600 | -3.0244 | -3.0181 | -166.7859 | -187.1137 | 0.6480 | 0.6155 | -0.9538 | 0.1663 | -1.1201 |
121
+ | 0.6275 | 0.98 | 5700 | -3.0245 | -3.0182 | -166.6791 | -186.9484 | 0.6482 | 0.6178 | -0.9527 | 0.1657 | -1.1184 |
122
+ | 0.5876 | 1.0 | 5800 | -3.0239 | -3.0176 | -166.7881 | -187.0472 | 0.6482 | 0.6171 | -0.9538 | 0.1656 | -1.1194 |
123
 
124
 
125
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "up_proj",
23
- "down_proj",
24
- "v_proj",
25
  "q_proj",
26
- "gate_proj",
 
27
  "o_proj",
28
- "k_proj"
 
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
 
22
  "q_proj",
23
+ "k_proj",
24
+ "up_proj",
25
  "o_proj",
26
+ "gate_proj",
27
+ "down_proj",
28
+ "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7320767e81a3f833497535872d812a619bb1fe953fc55f66fd53cbd7f462f9f
3
  size 201892728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f07b867162674e17e76b10c0a3c2c0d12a2c5ae3970245cf2b32f61cbad484
3
  size 201892728
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.27507112530743316,
4
- "train_runtime": 24370.3932,
5
  "train_samples": 92858,
6
- "train_samples_per_second": 3.81,
7
- "train_steps_per_second": 0.238
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.0003277428618961422,
4
+ "train_runtime": 17.7068,
5
  "train_samples": 92858,
6
+ "train_samples_per_second": 5244.214,
7
+ "train_steps_per_second": 327.728
8
  }
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 22,
18
+ "num_key_value_heads": 4,
19
+ "pretraining_tp": 1,
20
+ "quantization_config": {
21
+ "_load_in_4bit": true,
22
+ "_load_in_8bit": false,
23
+ "bnb_4bit_compute_dtype": "bfloat16",
24
+ "bnb_4bit_quant_storage": "uint8",
25
+ "bnb_4bit_quant_type": "nf4",
26
+ "bnb_4bit_use_double_quant": false,
27
+ "llm_int8_enable_fp32_cpu_offload": false,
28
+ "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": null,
30
+ "llm_int8_threshold": 6.0,
31
+ "load_in_4bit": true,
32
+ "load_in_8bit": false,
33
+ "quant_method": "bitsandbytes"
34
+ },
35
+ "rms_norm_eps": 1e-05,
36
+ "rope_scaling": null,
37
+ "rope_theta": 10000.0,
38
+ "tie_word_embeddings": false,
39
+ "torch_dtype": "bfloat16",
40
+ "transformers_version": "4.39.3",
41
+ "use_cache": true,
42
+ "vocab_size": 32000
43
+ }
runs/May10_09-48-15_poseidon/events.out.tfevents.1715359378.poseidon.2727122.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8117149c39a95dd6b660eb33fb1db45870474c361678692b942d7f8a07b75709
3
+ size 828
runs/May10_17-24-35_poseidon/events.out.tfevents.1715361893.poseidon.2825808.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334dae2d66def40d74583388cdcee54df57056687fc329ebc9be15efc5c33e8f
3
+ size 5729
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.27507112530743316,
4
- "train_runtime": 24370.3932,
5
  "train_samples": 92858,
6
- "train_samples_per_second": 3.81,
7
- "train_steps_per_second": 0.238
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.0003277428618961422,
4
+ "train_runtime": 17.7068,
5
  "train_samples": 92858,
6
+ "train_samples_per_second": 5244.214,
7
+ "train_steps_per_second": 327.728
8
  }
trainer_state.json CHANGED
@@ -9655,10 +9655,10 @@
9655
  "epoch": 1.0,
9656
  "step": 5803,
9657
  "total_flos": 0.0,
9658
- "train_loss": 0.27507112530743316,
9659
- "train_runtime": 24370.3932,
9660
- "train_samples_per_second": 3.81,
9661
- "train_steps_per_second": 0.238
9662
  }
9663
  ],
9664
  "logging_steps": 10,
 
9655
  "epoch": 1.0,
9656
  "step": 5803,
9657
  "total_flos": 0.0,
9658
+ "train_loss": 0.0003277428618961422,
9659
+ "train_runtime": 17.7068,
9660
+ "train_samples_per_second": 5244.214,
9661
+ "train_steps_per_second": 327.728
9662
  }
9663
  ],
9664
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e94b06a057977aeba4819f0c835f5a2c5c155d7cddc162ec1114873d1f3f45d
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f6ffbe0f0ac24f76d3c2accb18e55595b98097608ade3729535f8d635c67be2
3
  size 5112