RedaAlami commited on
Commit
2e2cf85
1 Parent(s): 36177d4

Model save

Browse files
Files changed (4) hide show
  1. README.md +41 -44
  2. all_results.json +4 -17
  3. train_results.json +4 -4
  4. trainer_state.json +0 -0
README.md CHANGED
@@ -1,10 +1,7 @@
1
  ---
2
  base_model: TII-Frontier-Team/falcon3-3b-instruct
3
- datasets:
4
- - TII-Frontier-Team/Reasoning_DPO
5
  library_name: peft
6
  tags:
7
- - alignment-handbook
8
  - trl
9
  - dpo
10
  - generated_from_trainer
@@ -18,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # zephyr-7b-dpo-qlora
20
 
21
- This model is a fine-tuned version of [TII-Frontier-Team/PEFT-falcon3b-it-gsm8k](https://huggingface.co/TII-Frontier-Team/PEFT-falcon3b-it-gsm8k) on the TII-Frontier-Team/Reasoning_DPO dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.0286
24
- - Rewards/chosen: -4.7078
25
- - Rewards/rejected: -10.6652
26
- - Rewards/accuracies: 0.9254
27
- - Rewards/margins: 5.9575
28
- - Logps/rejected: -1102.4209
29
- - Logps/chosen: -503.5470
30
- - Logits/rejected: 1.9412
31
- - Logits/chosen: 2.1408
32
 
33
  ## Model description
34
 
@@ -65,37 +62,37 @@ The following hyperparameters were used during training:
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
67
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
68
- | 0.6914 | 0.0315 | 100 | 0.6912 | 0.0006 | -0.0036 | 0.6340 | 0.0042 | -36.2582 | -32.7125 | -1.6841 | -1.6367 |
69
- | 0.6743 | 0.0629 | 200 | 0.6753 | -0.0009 | -0.0462 | 0.6321 | 0.0454 | -40.5232 | -32.8573 | -1.5154 | -1.4649 |
70
- | 0.6112 | 0.0944 | 300 | 0.5905 | -0.5010 | -0.8365 | 0.6631 | 0.3356 | -119.5518 | -82.8670 | -0.5166 | -0.4325 |
71
- | 0.4477 | 0.1258 | 400 | 0.4026 | -1.9267 | -3.0850 | 0.7201 | 1.1583 | -344.3972 | -225.4428 | -0.5023 | -0.3494 |
72
- | 0.3583 | 0.1573 | 500 | 0.3063 | -2.4869 | -4.1367 | 0.7646 | 1.6498 | -449.5698 | -281.4605 | 0.3124 | 0.4717 |
73
- | 0.3041 | 0.1887 | 600 | 0.2405 | -2.9070 | -4.9732 | 0.7918 | 2.0662 | -533.2189 | -323.4665 | 0.9644 | 1.1113 |
74
- | 0.2487 | 0.2202 | 700 | 0.1964 | -3.4123 | -5.8172 | 0.8209 | 2.4050 | -617.6231 | -373.9985 | 1.1343 | 1.2933 |
75
- | 0.218 | 0.2517 | 800 | 0.1547 | -3.6771 | -6.6251 | 0.8336 | 2.9480 | -698.4094 | -400.4795 | 1.5710 | 1.7290 |
76
- | 0.1858 | 0.2831 | 900 | 0.1394 | -3.5484 | -6.6808 | 0.8485 | 3.1324 | -703.9799 | -387.6123 | 1.6988 | 1.8631 |
77
- | 0.173 | 0.3146 | 1000 | 0.1176 | -3.4824 | -6.7705 | 0.8649 | 3.2881 | -712.9531 | -381.0118 | 1.8190 | 1.9776 |
78
- | 0.1494 | 0.3460 | 1100 | 0.0979 | -3.7942 | -7.4529 | 0.8713 | 3.6587 | -781.1857 | -412.1861 | 1.8179 | 1.9865 |
79
- | 0.149 | 0.3775 | 1200 | 0.0817 | -4.1856 | -8.2504 | 0.8843 | 4.0648 | -860.9355 | -451.3316 | 1.8715 | 2.0581 |
80
- | 0.1143 | 0.4089 | 1300 | 0.0702 | -4.2444 | -8.6154 | 0.8884 | 4.3710 | -897.4431 | -457.2141 | 1.7765 | 1.9770 |
81
- | 0.1204 | 0.4404 | 1400 | 0.0642 | -4.1442 | -8.6112 | 0.8966 | 4.4670 | -897.0154 | -447.1863 | 2.1996 | 2.3734 |
82
- | 0.1013 | 0.4718 | 1500 | 0.0580 | -4.5031 | -9.1159 | 0.8951 | 4.6128 | -947.4904 | -483.0838 | 1.9514 | 2.1364 |
83
- | 0.1011 | 0.5033 | 1600 | 0.0567 | -4.0373 | -8.5779 | 0.9067 | 4.5406 | -893.6846 | -436.5011 | 1.9239 | 2.1103 |
84
- | 0.0853 | 0.5348 | 1700 | 0.0482 | -4.3119 | -9.2927 | 0.9067 | 4.9808 | -965.1708 | -463.9637 | 2.0648 | 2.2336 |
85
- | 0.0897 | 0.5662 | 1800 | 0.0449 | -4.3018 | -9.4275 | 0.9101 | 5.1257 | -978.6490 | -462.9552 | 1.9037 | 2.0822 |
86
- | 0.0717 | 0.5977 | 1900 | 0.0402 | -4.4391 | -9.8395 | 0.9112 | 5.4004 | -1019.8445 | -476.6779 | 2.0003 | 2.1749 |
87
- | 0.0487 | 0.6291 | 2000 | 0.0368 | -5.4728 | -11.3180 | 0.9078 | 5.8452 | -1167.6968 | -580.0486 | 1.9355 | 2.1422 |
88
- | 0.0683 | 0.6606 | 2100 | 0.0356 | -4.6736 | -10.2835 | 0.9190 | 5.6099 | -1064.2465 | -500.1268 | 2.0206 | 2.2058 |
89
- | 0.0514 | 0.6920 | 2200 | 0.0341 | -4.6025 | -10.2228 | 0.9209 | 5.6203 | -1058.1812 | -493.0187 | 1.9362 | 2.1272 |
90
- | 0.0623 | 0.7235 | 2300 | 0.0326 | -4.9398 | -10.7061 | 0.9213 | 5.7663 | -1106.5096 | -526.7491 | 1.8240 | 2.0327 |
91
- | 0.0693 | 0.7550 | 2400 | 0.0313 | -4.8024 | -10.6310 | 0.9231 | 5.8286 | -1098.9999 | -513.0095 | 1.8580 | 2.0583 |
92
- | 0.0543 | 0.7864 | 2500 | 0.0303 | -4.8132 | -10.7352 | 0.9228 | 5.9221 | -1109.4199 | -514.0873 | 1.9534 | 2.1471 |
93
- | 0.0555 | 0.8179 | 2600 | 0.0301 | -4.7251 | -10.5626 | 0.9261 | 5.8375 | -1092.1620 | -505.2810 | 1.9398 | 2.1357 |
94
- | 0.0646 | 0.8493 | 2700 | 0.0294 | -4.6930 | -10.6307 | 0.9261 | 5.9377 | -1098.9694 | -502.0694 | 2.0003 | 2.1947 |
95
- | 0.0546 | 0.8808 | 2800 | 0.0287 | -4.8085 | -10.8169 | 0.9250 | 6.0084 | -1117.5887 | -513.6258 | 1.9596 | 2.1607 |
96
- | 0.0702 | 0.9122 | 2900 | 0.0288 | -4.6970 | -10.6904 | 0.9243 | 5.9934 | -1104.9371 | -502.4718 | 1.9696 | 2.1647 |
97
- | 0.0623 | 0.9437 | 3000 | 0.0286 | -4.7098 | -10.6743 | 0.9269 | 5.9645 | -1103.3302 | -503.7507 | 1.9440 | 2.1437 |
98
- | 0.0593 | 0.9751 | 3100 | 0.0287 | -4.6985 | -10.6531 | 0.9276 | 5.9547 | -1101.2122 | -502.6163 | 1.9469 | 2.1464 |
99
 
100
 
101
  ### Framework versions
 
1
  ---
2
  base_model: TII-Frontier-Team/falcon3-3b-instruct
 
 
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
15
 
16
  # zephyr-7b-dpo-qlora
17
 
18
+ This model is a fine-tuned version of [TII-Frontier-Team/falcon3-3b-instruct](https://huggingface.co/TII-Frontier-Team/falcon3-3b-instruct) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0299
21
+ - Rewards/chosen: -4.6289
22
+ - Rewards/rejected: -10.4404
23
+ - Rewards/accuracies: 0.9302
24
+ - Rewards/margins: 5.8116
25
+ - Logps/rejected: -1079.9603
26
+ - Logps/chosen: -495.6860
27
+ - Logits/rejected: 2.0537
28
+ - Logits/chosen: 2.2623
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.6913 | 0.0315 | 100 | 0.6911 | 0.0007 | -0.0036 | 0.6220 | 0.0042 | -36.2718 | -32.7285 | -1.6824 | -1.6348 |
66
+ | 0.6742 | 0.0629 | 200 | 0.6751 | 0.0003 | -0.0454 | 0.6276 | 0.0458 | -40.4596 | -32.7631 | -1.5097 | -1.4586 |
67
+ | 0.6081 | 0.0944 | 300 | 0.5872 | -0.5193 | -0.8644 | 0.6619 | 0.3451 | -122.3552 | -84.7303 | -0.4701 | -0.3830 |
68
+ | 0.4463 | 0.1258 | 400 | 0.3978 | -2.0312 | -3.2212 | 0.7190 | 1.1900 | -358.0407 | -235.9217 | -0.3673 | -0.2101 |
69
+ | 0.3548 | 0.1573 | 500 | 0.3048 | -2.5142 | -4.1605 | 0.7698 | 1.6464 | -451.9689 | -284.2137 | 0.4417 | 0.6033 |
70
+ | 0.3014 | 0.1887 | 600 | 0.2395 | -2.7662 | -4.8033 | 0.7963 | 2.0371 | -516.2451 | -309.4138 | 1.0026 | 1.1670 |
71
+ | 0.25 | 0.2202 | 700 | 0.1989 | -3.1039 | -5.4194 | 0.8235 | 2.3155 | -577.8538 | -343.1828 | 1.3421 | 1.5051 |
72
+ | 0.2163 | 0.2517 | 800 | 0.1564 | -3.4535 | -6.3881 | 0.8369 | 2.9346 | -674.7255 | -378.1511 | 1.8084 | 1.9697 |
73
+ | 0.178 | 0.2831 | 900 | 0.1349 | -3.4355 | -6.5411 | 0.8586 | 3.1056 | -690.0276 | -376.3503 | 1.7688 | 1.9492 |
74
+ | 0.1736 | 0.3146 | 1000 | 0.1127 | -3.5471 | -6.9599 | 0.8668 | 3.4128 | -731.9055 | -387.5069 | 2.0848 | 2.2440 |
75
+ | 0.1474 | 0.3460 | 1100 | 0.0982 | -3.6177 | -7.2322 | 0.8799 | 3.6145 | -759.1403 | -394.5700 | 1.8280 | 2.0076 |
76
+ | 0.1382 | 0.3775 | 1200 | 0.0819 | -4.3123 | -8.3603 | 0.8862 | 4.0480 | -871.9455 | -464.0287 | 2.0966 | 2.2833 |
77
+ | 0.1133 | 0.4089 | 1300 | 0.0714 | -4.0671 | -8.3309 | 0.8955 | 4.2638 | -869.0029 | -439.5055 | 1.9082 | 2.1044 |
78
+ | 0.1209 | 0.4404 | 1400 | 0.0634 | -4.8366 | -9.4739 | 0.8933 | 4.6374 | -983.3081 | -516.4533 | 2.0574 | 2.2678 |
79
+ | 0.1057 | 0.4718 | 1500 | 0.0575 | -4.1835 | -8.8581 | 0.9019 | 4.6746 | -921.7241 | -451.1488 | 2.0907 | 2.2780 |
80
+ | 0.1057 | 0.5033 | 1600 | 0.0536 | -4.2093 | -8.9250 | 0.9131 | 4.7157 | -928.4156 | -453.7231 | 2.0198 | 2.2136 |
81
+ | 0.0881 | 0.5348 | 1700 | 0.0490 | -4.4577 | -9.3694 | 0.9101 | 4.9118 | -972.8605 | -478.5644 | 1.8760 | 2.0804 |
82
+ | 0.0847 | 0.5662 | 1800 | 0.0441 | -4.2531 | -9.4108 | 0.9131 | 5.1578 | -977.0005 | -458.1054 | 2.0999 | 2.2904 |
83
+ | 0.0713 | 0.5977 | 1900 | 0.0411 | -4.4101 | -9.6543 | 0.9168 | 5.2442 | -1001.3448 | -473.8065 | 2.0887 | 2.2861 |
84
+ | 0.0553 | 0.6291 | 2000 | 0.0378 | -4.9687 | -10.5782 | 0.9123 | 5.6095 | -1093.7402 | -529.6686 | 2.0469 | 2.2608 |
85
+ | 0.0668 | 0.6606 | 2100 | 0.0362 | -4.7485 | -10.3227 | 0.9190 | 5.5741 | -1068.1823 | -507.6488 | 2.1354 | 2.3368 |
86
+ | 0.0528 | 0.6920 | 2200 | 0.0356 | -4.6766 | -10.2170 | 0.9175 | 5.5404 | -1057.6173 | -500.4605 | 1.9572 | 2.1594 |
87
+ | 0.0596 | 0.7235 | 2300 | 0.0340 | -4.6180 | -10.2121 | 0.9235 | 5.5942 | -1057.1299 | -494.5929 | 2.0041 | 2.2117 |
88
+ | 0.063 | 0.7550 | 2400 | 0.0328 | -4.5357 | -10.1876 | 0.9257 | 5.6519 | -1054.6713 | -486.3653 | 2.1493 | 2.3488 |
89
+ | 0.0558 | 0.7864 | 2500 | 0.0311 | -4.7155 | -10.5680 | 0.9261 | 5.8526 | -1092.7185 | -504.3435 | 2.1208 | 2.3275 |
90
+ | 0.0552 | 0.8179 | 2600 | 0.0312 | -4.6574 | -10.3658 | 0.9254 | 5.7084 | -1072.4943 | -498.5399 | 2.0544 | 2.2592 |
91
+ | 0.066 | 0.8493 | 2700 | 0.0305 | -4.6506 | -10.4766 | 0.9287 | 5.8259 | -1083.5740 | -497.8611 | 2.0914 | 2.2968 |
92
+ | 0.0568 | 0.8808 | 2800 | 0.0302 | -4.6423 | -10.4629 | 0.9302 | 5.8206 | -1082.2051 | -497.0266 | 2.0957 | 2.3026 |
93
+ | 0.0602 | 0.9122 | 2900 | 0.0299 | -4.6260 | -10.4608 | 0.9299 | 5.8348 | -1081.9958 | -495.3989 | 2.0861 | 2.2911 |
94
+ | 0.0634 | 0.9437 | 3000 | 0.0298 | -4.6454 | -10.4843 | 0.9313 | 5.8389 | -1084.3455 | -497.3409 | 2.0655 | 2.2739 |
95
+ | 0.0602 | 0.9751 | 3100 | 0.0299 | -4.6289 | -10.4404 | 0.9302 | 5.8116 | -1079.9603 | -495.6860 | 2.0537 | 2.2623 |
96
 
97
 
98
  ### Framework versions
all_results.json CHANGED
@@ -1,22 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 2.140789270401001,
4
- "eval_logits/rejected": 1.9412086009979248,
5
- "eval_logps/chosen": -503.5469665527344,
6
- "eval_logps/rejected": -1102.4208984375,
7
- "eval_loss": 0.028598472476005554,
8
- "eval_rewards/accuracies": 0.9253731369972229,
9
- "eval_rewards/chosen": -4.707759380340576,
10
- "eval_rewards/margins": 5.957462787628174,
11
- "eval_rewards/rejected": -10.66522216796875,
12
- "eval_runtime": 214.7372,
13
- "eval_samples": 21417,
14
- "eval_samples_per_second": 99.736,
15
- "eval_steps_per_second": 1.56,
16
  "total_flos": 0.0,
17
- "train_loss": 0.19036180805619987,
18
- "train_runtime": 15997.0818,
19
  "train_samples": 406907,
20
- "train_samples_per_second": 25.436,
21
- "train_steps_per_second": 0.199
22
  }
 
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.18858218038370866,
5
+ "train_runtime": 16104.4918,
6
  "train_samples": 406907,
7
+ "train_samples_per_second": 25.267,
8
+ "train_steps_per_second": 0.197
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.19036180805619987,
5
- "train_runtime": 15997.0818,
6
  "train_samples": 406907,
7
- "train_samples_per_second": 25.436,
8
- "train_steps_per_second": 0.199
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.18858218038370866,
5
+ "train_runtime": 16104.4918,
6
  "train_samples": 406907,
7
+ "train_samples_per_second": 25.267,
8
+ "train_steps_per_second": 0.197
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff