dball commited on
Commit
84f5259
1 Parent(s): e52e5b4

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - HuggingFaceH4/ultrafeedback_binarized
12
  base_model: mistralai/Mistral-7B-v0.1
13
  model-index:
14
  - name: zephyr-7b-dpo-qlora
@@ -20,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # zephyr-7b-dpo-qlora
22
 
23
- This model is a fine-tuned version of [dball/zephyr-7b-sft-qlora](https://huggingface.co/dball/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
24
  It achieves the following results on the evaluation set:
25
  - Loss: 0.5058
26
  - Rewards/chosen: -2.0144
@@ -63,84 +59,84 @@ The following hyperparameters were used during training:
63
 
64
  ### Training results
65
 
66
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
67
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
68
- | 0.6934 | 0.01 | 100 | 0.6931 | 0.0002 | 0.0001 | 0.5105 | 0.0001 | -248.5731 | -268.4692 | -2.4383 | -2.5261 |
69
- | 0.6924 | 0.03 | 200 | 0.6926 | 0.0014 | 0.0003 | 0.5605 | 0.0011 | -248.5511 | -268.3451 | -2.4368 | -2.5247 |
70
- | 0.691 | 0.04 | 300 | 0.6907 | 0.0091 | 0.0041 | 0.6440 | 0.0050 | -248.1753 | -267.5839 | -2.4378 | -2.5253 |
71
- | 0.6876 | 0.05 | 400 | 0.6845 | 0.0405 | 0.0227 | 0.6580 | 0.0178 | -246.3089 | -264.4353 | -2.4351 | -2.5230 |
72
- | 0.6799 | 0.07 | 500 | 0.6707 | 0.0354 | -0.0135 | 0.6815 | 0.0489 | -249.9276 | -264.9495 | -2.3755 | -2.4660 |
73
- | 0.6577 | 0.08 | 600 | 0.6462 | -0.1230 | -0.2378 | 0.6750 | 0.1148 | -272.3604 | -280.7885 | -2.2541 | -2.3601 |
74
- | 0.6365 | 0.09 | 700 | 0.6345 | -0.0856 | -0.2362 | 0.6860 | 0.1507 | -272.2037 | -277.0453 | -2.2013 | -2.3136 |
75
- | 0.6519 | 0.1 | 800 | 0.6240 | -0.4943 | -0.7231 | 0.6630 | 0.2287 | -320.8872 | -317.9223 | -2.0482 | -2.1835 |
76
- | 0.6547 | 0.12 | 900 | 0.6203 | -0.5733 | -0.8287 | 0.6695 | 0.2555 | -331.4542 | -325.8177 | -2.0783 | -2.2184 |
77
- | 0.5841 | 0.13 | 1000 | 0.6071 | -0.5361 | -0.8600 | 0.6820 | 0.3239 | -334.5816 | -322.0998 | -2.0689 | -2.2086 |
78
- | 0.5877 | 0.14 | 1100 | 0.5947 | -1.1495 | -1.6229 | 0.6855 | 0.4734 | -410.8678 | -383.4380 | -1.1053 | -1.3836 |
79
- | 0.5552 | 0.16 | 1200 | 0.5909 | -1.4256 | -1.8934 | 0.6880 | 0.4678 | -437.9200 | -411.0459 | -0.3614 | -0.7372 |
80
- | 0.5492 | 0.17 | 1300 | 0.5791 | -1.4614 | -1.9771 | 0.6935 | 0.5157 | -446.2910 | -414.6323 | -0.1933 | -0.5949 |
81
- | 0.5789 | 0.18 | 1400 | 0.5771 | -0.8799 | -1.3633 | 0.7035 | 0.4834 | -384.9109 | -356.4832 | -0.1908 | -0.5846 |
82
- | 0.5456 | 0.2 | 1500 | 0.5646 | -1.1845 | -1.7913 | 0.7035 | 0.6068 | -427.7158 | -386.9436 | 0.3098 | -0.1574 |
83
- | 0.4722 | 0.21 | 1600 | 0.5598 | -1.3242 | -1.9424 | 0.7075 | 0.6181 | -442.8174 | -400.9113 | 0.5395 | 0.0346 |
84
- | 0.5072 | 0.22 | 1700 | 0.5574 | -1.5040 | -2.1667 | 0.7060 | 0.6628 | -465.2537 | -418.8860 | 1.0411 | 0.4657 |
85
- | 0.5284 | 0.24 | 1800 | 0.5534 | -1.5486 | -2.2055 | 0.7070 | 0.6568 | -469.1293 | -423.3542 | 1.2404 | 0.6528 |
86
- | 0.5623 | 0.25 | 1900 | 0.5625 | -1.7106 | -2.4247 | 0.7055 | 0.7141 | -491.0526 | -439.5539 | 0.7808 | 0.3058 |
87
- | 0.6092 | 0.26 | 2000 | 0.5501 | -1.0158 | -1.6513 | 0.7085 | 0.6354 | -413.7089 | -370.0728 | 0.5199 | 0.0079 |
88
- | 0.5726 | 0.27 | 2100 | 0.5433 | -1.4697 | -2.1580 | 0.7150 | 0.6884 | -464.3842 | -415.4569 | 0.9981 | 0.4405 |
89
- | 0.5323 | 0.29 | 2200 | 0.5483 | -1.3173 | -2.0886 | 0.7150 | 0.7713 | -457.4451 | -400.2244 | 1.3533 | 0.7445 |
90
- | 0.5148 | 0.3 | 2300 | 0.5387 | -1.3194 | -2.0188 | 0.7275 | 0.6994 | -450.4646 | -400.4308 | 1.1454 | 0.5107 |
91
- | 0.4112 | 0.31 | 2400 | 0.5401 | -1.6201 | -2.4219 | 0.7200 | 0.8018 | -490.7723 | -430.5040 | 1.2866 | 0.6648 |
92
- | 0.5246 | 0.33 | 2500 | 0.5413 | -2.1278 | -2.8964 | 0.7220 | 0.7686 | -538.2222 | -481.2729 | 1.7388 | 1.0914 |
93
- | 0.5657 | 0.34 | 2600 | 0.5373 | -1.6863 | -2.4642 | 0.7200 | 0.7779 | -495.0003 | -437.1172 | 1.6571 | 0.9886 |
94
- | 0.5216 | 0.35 | 2700 | 0.5357 | -1.9895 | -2.7395 | 0.7260 | 0.7500 | -522.5278 | -467.4365 | 1.7936 | 1.1290 |
95
- | 0.5865 | 0.37 | 2800 | 0.5351 | -2.1007 | -2.8103 | 0.7260 | 0.7096 | -529.6149 | -478.5605 | 1.7565 | 1.1019 |
96
- | 0.5252 | 0.38 | 2900 | 0.5376 | -1.5816 | -2.4416 | 0.7205 | 0.8600 | -492.7397 | -426.6496 | 1.5686 | 0.9108 |
97
- | 0.5381 | 0.39 | 3000 | 0.5306 | -1.5416 | -2.3719 | 0.7230 | 0.8303 | -485.7741 | -422.6485 | 1.7206 | 1.0233 |
98
- | 0.4587 | 0.41 | 3100 | 0.5222 | -1.4511 | -2.1850 | 0.7260 | 0.7339 | -467.0778 | -413.6005 | 1.8445 | 1.1221 |
99
- | 0.5173 | 0.42 | 3200 | 0.5277 | -1.3551 | -2.1383 | 0.7260 | 0.7832 | -462.4095 | -403.9989 | 1.6186 | 0.8981 |
100
- | 0.5851 | 0.43 | 3300 | 0.5181 | -1.6864 | -2.5011 | 0.7325 | 0.8148 | -498.6931 | -437.1258 | 2.0344 | 1.2860 |
101
- | 0.5811 | 0.44 | 3400 | 0.5166 | -1.6007 | -2.4386 | 0.7335 | 0.8379 | -492.4408 | -428.5590 | 1.7238 | 1.0162 |
102
- | 0.4892 | 0.46 | 3500 | 0.5257 | -1.4712 | -2.3237 | 0.7280 | 0.8525 | -480.9519 | -415.6104 | 2.0709 | 1.3014 |
103
- | 0.5438 | 0.47 | 3600 | 0.5252 | -1.5967 | -2.4449 | 0.7275 | 0.8482 | -493.0664 | -428.1592 | 2.2020 | 1.4150 |
104
- | 0.5677 | 0.48 | 3700 | 0.5152 | -1.9726 | -2.8128 | 0.7275 | 0.8402 | -529.8630 | -465.7504 | 2.4678 | 1.6843 |
105
- | 0.5471 | 0.5 | 3800 | 0.5240 | -2.0731 | -3.0300 | 0.7255 | 0.9569 | -551.5833 | -475.7978 | 2.2022 | 1.4352 |
106
- | 0.5193 | 0.51 | 3900 | 0.5185 | -2.1713 | -3.1118 | 0.7340 | 0.9405 | -559.7596 | -485.6194 | 2.1469 | 1.3990 |
107
- | 0.5764 | 0.52 | 4000 | 0.5177 | -2.0057 | -2.9735 | 0.7310 | 0.9678 | -545.9298 | -469.0576 | 1.8653 | 1.1192 |
108
- | 0.504 | 0.54 | 4100 | 0.5180 | -1.8237 | -2.7453 | 0.7270 | 0.9217 | -523.1135 | -450.8565 | 1.7948 | 1.0344 |
109
- | 0.4846 | 0.55 | 4200 | 0.5168 | -2.1214 | -3.0448 | 0.7260 | 0.9234 | -553.0635 | -480.6317 | 2.1064 | 1.3329 |
110
- | 0.426 | 0.56 | 4300 | 0.5096 | -2.0142 | -2.9490 | 0.7325 | 0.9349 | -543.4855 | -469.9074 | 2.0377 | 1.2900 |
111
- | 0.5289 | 0.58 | 4400 | 0.5143 | -1.9624 | -2.9368 | 0.7260 | 0.9744 | -542.2659 | -464.7332 | 1.7669 | 1.0286 |
112
- | 0.4542 | 0.59 | 4500 | 0.5102 | -1.9643 | -2.9280 | 0.7335 | 0.9637 | -541.3861 | -464.9223 | 1.8775 | 1.1395 |
113
- | 0.4839 | 0.6 | 4600 | 0.5094 | -2.0037 | -2.9783 | 0.7305 | 0.9747 | -546.4150 | -468.8564 | 1.8858 | 1.1472 |
114
- | 0.5562 | 0.62 | 4700 | 0.5076 | -2.0260 | -2.9819 | 0.7340 | 0.9559 | -546.7677 | -471.0873 | 1.9384 | 1.1999 |
115
- | 0.4964 | 0.63 | 4800 | 0.5078 | -2.1724 | -3.1285 | 0.7335 | 0.9561 | -561.4290 | -485.7305 | 2.1538 | 1.3968 |
116
- | 0.4879 | 0.64 | 4900 | 0.5125 | -2.2107 | -3.2298 | 0.7310 | 1.0191 | -571.5599 | -489.5623 | 2.1324 | 1.3802 |
117
- | 0.4916 | 0.65 | 5000 | 0.5087 | -2.0966 | -3.1006 | 0.7300 | 1.0041 | -558.6430 | -478.1451 | 2.1161 | 1.3780 |
118
- | 0.5806 | 0.67 | 5100 | 0.5089 | -2.2279 | -3.2378 | 0.7305 | 1.0099 | -572.3604 | -491.2838 | 2.0897 | 1.3595 |
119
- | 0.5027 | 0.68 | 5200 | 0.5038 | -1.8962 | -2.8326 | 0.7375 | 0.9364 | -531.8434 | -458.1095 | 1.8014 | 1.0714 |
120
- | 0.4554 | 0.69 | 5300 | 0.5052 | -1.9550 | -2.9208 | 0.7330 | 0.9658 | -540.6600 | -463.9870 | 1.8905 | 1.1555 |
121
- | 0.4521 | 0.71 | 5400 | 0.5039 | -1.9912 | -2.9472 | 0.7370 | 0.9559 | -543.2982 | -467.6124 | 1.8437 | 1.1076 |
122
- | 0.5869 | 0.72 | 5500 | 0.5054 | -2.1704 | -3.1637 | 0.7360 | 0.9933 | -564.9521 | -485.5281 | 1.8865 | 1.1574 |
123
- | 0.5924 | 0.73 | 5600 | 0.5064 | -1.8180 | -2.7843 | 0.7320 | 0.9663 | -527.0139 | -450.2935 | 1.5325 | 0.8215 |
124
- | 0.4275 | 0.75 | 5700 | 0.5055 | -2.0070 | -3.0130 | 0.7340 | 1.0060 | -549.8819 | -469.1932 | 1.7229 | 0.9960 |
125
- | 0.4746 | 0.76 | 5800 | 0.5072 | -2.2069 | -3.2470 | 0.7300 | 1.0401 | -573.2806 | -489.1825 | 1.8507 | 1.1168 |
126
- | 0.5033 | 0.77 | 5900 | 0.5061 | -1.8962 | -2.8744 | 0.7275 | 0.9782 | -536.0162 | -458.1062 | 1.7071 | 0.9675 |
127
- | 0.4517 | 0.79 | 6000 | 0.5105 | -1.7324 | -2.6813 | 0.7265 | 0.9489 | -516.7132 | -441.7279 | 1.5613 | 0.8156 |
128
- | 0.5071 | 0.8 | 6100 | 0.5116 | -1.8634 | -2.8617 | 0.7275 | 0.9983 | -534.7506 | -454.8272 | 1.6895 | 0.9370 |
129
- | 0.6455 | 0.81 | 6200 | 0.5110 | -1.8796 | -2.8743 | 0.7250 | 0.9947 | -536.0126 | -456.4508 | 1.7120 | 0.9542 |
130
- | 0.4796 | 0.82 | 6300 | 0.5112 | -1.9250 | -2.9447 | 0.7260 | 1.0197 | -543.0519 | -460.9879 | 1.7784 | 1.0203 |
131
- | 0.5568 | 0.84 | 6400 | 0.5086 | -1.9539 | -2.9695 | 0.7275 | 1.0156 | -545.5328 | -463.8810 | 1.8764 | 1.1152 |
132
- | 0.4335 | 0.85 | 6500 | 0.5067 | -2.0048 | -3.0192 | 0.7295 | 1.0144 | -550.4982 | -468.9681 | 1.9425 | 1.1822 |
133
- | 0.5263 | 0.86 | 6600 | 0.5066 | -1.9682 | -2.9769 | 0.7310 | 1.0087 | -546.2759 | -465.3099 | 1.9390 | 1.1806 |
134
- | 0.5263 | 0.88 | 6700 | 0.5066 | -1.9719 | -2.9803 | 0.7320 | 1.0084 | -546.6119 | -465.6784 | 1.9366 | 1.1794 |
135
- | 0.4939 | 0.89 | 6800 | 0.5063 | -2.0205 | -3.0328 | 0.7325 | 1.0123 | -551.8629 | -470.5374 | 1.9795 | 1.2238 |
136
- | 0.5763 | 0.9 | 6900 | 0.5060 | -2.0098 | -3.0191 | 0.7330 | 1.0092 | -550.4863 | -469.4713 | 1.9579 | 1.2027 |
137
- | 0.5062 | 0.92 | 7000 | 0.5059 | -2.0030 | -3.0107 | 0.7320 | 1.0077 | -549.6514 | -468.7946 | 1.9574 | 1.2018 |
138
- | 0.4432 | 0.93 | 7100 | 0.5059 | -2.0132 | -3.0218 | 0.7330 | 1.0085 | -550.7594 | -469.8141 | 1.9675 | 1.2115 |
139
- | 0.5294 | 0.94 | 7200 | 0.5059 | -2.0141 | -3.0230 | 0.7315 | 1.0089 | -550.8820 | -469.9014 | 1.9679 | 1.2123 |
140
- | 0.4488 | 0.96 | 7300 | 0.5058 | -2.0144 | -3.0239 | 0.7320 | 1.0095 | -550.9682 | -469.9289 | 1.9688 | 1.2130 |
141
- | 0.4747 | 0.97 | 7400 | 0.5057 | -2.0142 | -3.0234 | 0.7325 | 1.0092 | -550.9178 | -469.9052 | 1.9679 | 1.2122 |
142
- | 0.4494 | 0.98 | 7500 | 0.5058 | -2.0144 | -3.0238 | 0.7350 | 1.0093 | -550.9584 | -469.9345 | 1.9679 | 1.2121 |
143
- | 0.5319 | 0.99 | 7600 | 0.5058 | -2.0144 | -3.0238 | 0.7350 | 1.0093 | -550.9584 | -469.9345 | 1.9679 | 1.2121 |
144
 
145
 
146
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
8
  base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
10
  - name: zephyr-7b-dpo-qlora
 
16
 
17
  # zephyr-7b-dpo-qlora
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.5058
22
  - Rewards/chosen: -2.0144
 
59
 
60
  ### Training results
61
 
62
+ | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
63
+ |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
64
+ | 0.6934 | 0.01 | 100 | -2.5261 | -2.4383 | -268.4692 | -248.5731 | 0.6931 | 0.5105 | 0.0002 | 0.0001 | 0.0001 |
65
+ | 0.6924 | 0.03 | 200 | -2.5247 | -2.4368 | -268.3451 | -248.5511 | 0.6926 | 0.5605 | 0.0014 | 0.0011 | 0.0003 |
66
+ | 0.691 | 0.04 | 300 | -2.5253 | -2.4378 | -267.5839 | -248.1753 | 0.6907 | 0.6440 | 0.0091 | 0.0050 | 0.0041 |
67
+ | 0.6876 | 0.05 | 400 | -2.5230 | -2.4351 | -264.4353 | -246.3089 | 0.6845 | 0.6580 | 0.0405 | 0.0178 | 0.0227 |
68
+ | 0.6799 | 0.07 | 500 | -2.4660 | -2.3755 | -264.9495 | -249.9276 | 0.6707 | 0.6815 | 0.0354 | 0.0489 | -0.0135 |
69
+ | 0.6577 | 0.08 | 600 | -2.3601 | -2.2541 | -280.7885 | -272.3604 | 0.6462 | 0.6750 | -0.1230 | 0.1148 | -0.2378 |
70
+ | 0.6365 | 0.09 | 700 | -2.3136 | -2.2013 | -277.0453 | -272.2037 | 0.6345 | 0.6860 | -0.0856 | 0.1507 | -0.2362 |
71
+ | 0.6519 | 0.1 | 800 | -2.1835 | -2.0482 | -317.9223 | -320.8872 | 0.6240 | 0.6630 | -0.4943 | 0.2287 | -0.7231 |
72
+ | 0.6547 | 0.12 | 900 | -2.2184 | -2.0783 | -325.8177 | -331.4542 | 0.6203 | 0.6695 | -0.5733 | 0.2555 | -0.8287 |
73
+ | 0.5841 | 0.13 | 1000 | -2.2086 | -2.0689 | -322.0998 | -334.5816 | 0.6071 | 0.6820 | -0.5361 | 0.3239 | -0.8600 |
74
+ | 0.5877 | 0.14 | 1100 | -1.3836 | -1.1053 | -383.4380 | -410.8678 | 0.5947 | 0.6855 | -1.1495 | 0.4734 | -1.6229 |
75
+ | 0.5552 | 0.16 | 1200 | -0.7372 | -0.3614 | -411.0459 | -437.9200 | 0.5909 | 0.6880 | -1.4256 | 0.4678 | -1.8934 |
76
+ | 0.5492 | 0.17 | 1300 | -0.5949 | -0.1933 | -414.6323 | -446.2910 | 0.5791 | 0.6935 | -1.4614 | 0.5157 | -1.9771 |
77
+ | 0.5789 | 0.18 | 1400 | -0.5846 | -0.1908 | -356.4832 | -384.9109 | 0.5771 | 0.7035 | -0.8799 | 0.4834 | -1.3633 |
78
+ | 0.5456 | 0.2 | 1500 | -0.1574 | 0.3098 | -386.9436 | -427.7158 | 0.5646 | 0.7035 | -1.1845 | 0.6068 | -1.7913 |
79
+ | 0.4722 | 0.21 | 1600 | 0.0346 | 0.5395 | -400.9113 | -442.8174 | 0.5598 | 0.7075 | -1.3242 | 0.6181 | -1.9424 |
80
+ | 0.5072 | 0.22 | 1700 | 0.4657 | 1.0411 | -418.8860 | -465.2537 | 0.5574 | 0.7060 | -1.5040 | 0.6628 | -2.1667 |
81
+ | 0.5284 | 0.24 | 1800 | 0.6528 | 1.2404 | -423.3542 | -469.1293 | 0.5534 | 0.7070 | -1.5486 | 0.6568 | -2.2055 |
82
+ | 0.5623 | 0.25 | 1900 | 0.3058 | 0.7808 | -439.5539 | -491.0526 | 0.5625 | 0.7055 | -1.7106 | 0.7141 | -2.4247 |
83
+ | 0.6092 | 0.26 | 2000 | 0.0079 | 0.5199 | -370.0728 | -413.7089 | 0.5501 | 0.7085 | -1.0158 | 0.6354 | -1.6513 |
84
+ | 0.5726 | 0.27 | 2100 | 0.4405 | 0.9981 | -415.4569 | -464.3842 | 0.5433 | 0.7150 | -1.4697 | 0.6884 | -2.1580 |
85
+ | 0.5323 | 0.29 | 2200 | 0.7445 | 1.3533 | -400.2244 | -457.4451 | 0.5483 | 0.7150 | -1.3173 | 0.7713 | -2.0886 |
86
+ | 0.5148 | 0.3 | 2300 | 0.5107 | 1.1454 | -400.4308 | -450.4646 | 0.5387 | 0.7275 | -1.3194 | 0.6994 | -2.0188 |
87
+ | 0.4112 | 0.31 | 2400 | 0.6648 | 1.2866 | -430.5040 | -490.7723 | 0.5401 | 0.7200 | -1.6201 | 0.8018 | -2.4219 |
88
+ | 0.5246 | 0.33 | 2500 | 1.0914 | 1.7388 | -481.2729 | -538.2222 | 0.5413 | 0.7220 | -2.1278 | 0.7686 | -2.8964 |
89
+ | 0.5657 | 0.34 | 2600 | 0.9886 | 1.6571 | -437.1172 | -495.0003 | 0.5373 | 0.7200 | -1.6863 | 0.7779 | -2.4642 |
90
+ | 0.5216 | 0.35 | 2700 | 1.1290 | 1.7936 | -467.4365 | -522.5278 | 0.5357 | 0.7260 | -1.9895 | 0.7500 | -2.7395 |
91
+ | 0.5865 | 0.37 | 2800 | 1.1019 | 1.7565 | -478.5605 | -529.6149 | 0.5351 | 0.7260 | -2.1007 | 0.7096 | -2.8103 |
92
+ | 0.5252 | 0.38 | 2900 | 0.9108 | 1.5686 | -426.6496 | -492.7397 | 0.5376 | 0.7205 | -1.5816 | 0.8600 | -2.4416 |
93
+ | 0.5381 | 0.39 | 3000 | 1.0233 | 1.7206 | -422.6485 | -485.7741 | 0.5306 | 0.7230 | -1.5416 | 0.8303 | -2.3719 |
94
+ | 0.4587 | 0.41 | 3100 | 1.1221 | 1.8445 | -413.6005 | -467.0778 | 0.5222 | 0.7260 | -1.4511 | 0.7339 | -2.1850 |
95
+ | 0.5173 | 0.42 | 3200 | 0.8981 | 1.6186 | -403.9989 | -462.4095 | 0.5277 | 0.7260 | -1.3551 | 0.7832 | -2.1383 |
96
+ | 0.5851 | 0.43 | 3300 | 1.2860 | 2.0344 | -437.1258 | -498.6931 | 0.5181 | 0.7325 | -1.6864 | 0.8148 | -2.5011 |
97
+ | 0.5811 | 0.44 | 3400 | 1.0162 | 1.7238 | -428.5590 | -492.4408 | 0.5166 | 0.7335 | -1.6007 | 0.8379 | -2.4386 |
98
+ | 0.4892 | 0.46 | 3500 | 1.3014 | 2.0709 | -415.6104 | -480.9519 | 0.5257 | 0.7280 | -1.4712 | 0.8525 | -2.3237 |
99
+ | 0.5438 | 0.47 | 3600 | 1.4150 | 2.2020 | -428.1592 | -493.0664 | 0.5252 | 0.7275 | -1.5967 | 0.8482 | -2.4449 |
100
+ | 0.5677 | 0.48 | 3700 | 1.6843 | 2.4678 | -465.7504 | -529.8630 | 0.5152 | 0.7275 | -1.9726 | 0.8402 | -2.8128 |
101
+ | 0.5471 | 0.5 | 3800 | 1.4352 | 2.2022 | -475.7978 | -551.5833 | 0.5240 | 0.7255 | -2.0731 | 0.9569 | -3.0300 |
102
+ | 0.5193 | 0.51 | 3900 | 1.3990 | 2.1469 | -485.6194 | -559.7596 | 0.5185 | 0.7340 | -2.1713 | 0.9405 | -3.1118 |
103
+ | 0.5764 | 0.52 | 4000 | 1.1192 | 1.8653 | -469.0576 | -545.9298 | 0.5177 | 0.7310 | -2.0057 | 0.9678 | -2.9735 |
104
+ | 0.504 | 0.54 | 4100 | 1.0344 | 1.7948 | -450.8565 | -523.1135 | 0.5180 | 0.7270 | -1.8237 | 0.9217 | -2.7453 |
105
+ | 0.4846 | 0.55 | 4200 | 1.3329 | 2.1064 | -480.6317 | -553.0635 | 0.5168 | 0.7260 | -2.1214 | 0.9234 | -3.0448 |
106
+ | 0.426 | 0.56 | 4300 | 1.2900 | 2.0377 | -469.9074 | -543.4855 | 0.5096 | 0.7325 | -2.0142 | 0.9349 | -2.9490 |
107
+ | 0.5289 | 0.58 | 4400 | 1.0286 | 1.7669 | -464.7332 | -542.2659 | 0.5143 | 0.7260 | -1.9624 | 0.9744 | -2.9368 |
108
+ | 0.4542 | 0.59 | 4500 | 1.1395 | 1.8775 | -464.9223 | -541.3861 | 0.5102 | 0.7335 | -1.9643 | 0.9637 | -2.9280 |
109
+ | 0.4839 | 0.6 | 4600 | 1.1472 | 1.8858 | -468.8564 | -546.4150 | 0.5094 | 0.7305 | -2.0037 | 0.9747 | -2.9783 |
110
+ | 0.5562 | 0.62 | 4700 | 1.1999 | 1.9384 | -471.0873 | -546.7677 | 0.5076 | 0.7340 | -2.0260 | 0.9559 | -2.9819 |
111
+ | 0.4964 | 0.63 | 4800 | 1.3968 | 2.1538 | -485.7305 | -561.4290 | 0.5078 | 0.7335 | -2.1724 | 0.9561 | -3.1285 |
112
+ | 0.4879 | 0.64 | 4900 | 1.3802 | 2.1324 | -489.5623 | -571.5599 | 0.5125 | 0.7310 | -2.2107 | 1.0191 | -3.2298 |
113
+ | 0.4916 | 0.65 | 5000 | 1.3780 | 2.1161 | -478.1451 | -558.6430 | 0.5087 | 0.7300 | -2.0966 | 1.0041 | -3.1006 |
114
+ | 0.5806 | 0.67 | 5100 | 1.3595 | 2.0897 | -491.2838 | -572.3604 | 0.5089 | 0.7305 | -2.2279 | 1.0099 | -3.2378 |
115
+ | 0.5027 | 0.68 | 5200 | 1.0714 | 1.8014 | -458.1095 | -531.8434 | 0.5038 | 0.7375 | -1.8962 | 0.9364 | -2.8326 |
116
+ | 0.4554 | 0.69 | 5300 | 1.1555 | 1.8905 | -463.9870 | -540.6600 | 0.5052 | 0.7330 | -1.9550 | 0.9658 | -2.9208 |
117
+ | 0.4521 | 0.71 | 5400 | 1.1076 | 1.8437 | -467.6124 | -543.2982 | 0.5039 | 0.7370 | -1.9912 | 0.9559 | -2.9472 |
118
+ | 0.5869 | 0.72 | 5500 | 1.1574 | 1.8865 | -485.5281 | -564.9521 | 0.5054 | 0.7360 | -2.1704 | 0.9933 | -3.1637 |
119
+ | 0.5924 | 0.73 | 5600 | 0.8215 | 1.5325 | -450.2935 | -527.0139 | 0.5064 | 0.7320 | -1.8180 | 0.9663 | -2.7843 |
120
+ | 0.4275 | 0.75 | 5700 | 0.9960 | 1.7229 | -469.1932 | -549.8819 | 0.5055 | 0.7340 | -2.0070 | 1.0060 | -3.0130 |
121
+ | 0.4746 | 0.76 | 5800 | 1.1168 | 1.8507 | -489.1825 | -573.2806 | 0.5072 | 0.7300 | -2.2069 | 1.0401 | -3.2470 |
122
+ | 0.5033 | 0.77 | 5900 | 0.9675 | 1.7071 | -458.1062 | -536.0162 | 0.5061 | 0.7275 | -1.8962 | 0.9782 | -2.8744 |
123
+ | 0.4517 | 0.79 | 6000 | 0.8156 | 1.5613 | -441.7279 | -516.7132 | 0.5105 | 0.7265 | -1.7324 | 0.9489 | -2.6813 |
124
+ | 0.5071 | 0.8 | 6100 | 0.9370 | 1.6895 | -454.8272 | -534.7506 | 0.5116 | 0.7275 | -1.8634 | 0.9983 | -2.8617 |
125
+ | 0.6455 | 0.81 | 6200 | 0.9542 | 1.7120 | -456.4508 | -536.0126 | 0.5110 | 0.7250 | -1.8796 | 0.9947 | -2.8743 |
126
+ | 0.4796 | 0.82 | 6300 | 1.0203 | 1.7784 | -460.9879 | -543.0519 | 0.5112 | 0.7260 | -1.9250 | 1.0197 | -2.9447 |
127
+ | 0.5568 | 0.84 | 6400 | 1.1152 | 1.8764 | -463.8810 | -545.5328 | 0.5086 | 0.7275 | -1.9539 | 1.0156 | -2.9695 |
128
+ | 0.4335 | 0.85 | 6500 | 1.1822 | 1.9425 | -468.9681 | -550.4982 | 0.5067 | 0.7295 | -2.0048 | 1.0144 | -3.0192 |
129
+ | 0.5263 | 0.86 | 6600 | 1.1806 | 1.9390 | -465.3099 | -546.2759 | 0.5066 | 0.7310 | -1.9682 | 1.0087 | -2.9769 |
130
+ | 0.5263 | 0.88 | 6700 | 1.1794 | 1.9366 | -465.6784 | -546.6119 | 0.5066 | 0.7320 | -1.9719 | 1.0084 | -2.9803 |
131
+ | 0.4939 | 0.89 | 6800 | 1.2238 | 1.9795 | -470.5374 | -551.8629 | 0.5063 | 0.7325 | -2.0205 | 1.0123 | -3.0328 |
132
+ | 0.5763 | 0.9 | 6900 | 1.2027 | 1.9579 | -469.4713 | -550.4863 | 0.5060 | 0.7330 | -2.0098 | 1.0092 | -3.0191 |
133
+ | 0.5062 | 0.92 | 7000 | 1.2018 | 1.9574 | -468.7946 | -549.6514 | 0.5059 | 0.7320 | -2.0030 | 1.0077 | -3.0107 |
134
+ | 0.4432 | 0.93 | 7100 | 1.2115 | 1.9675 | -469.8141 | -550.7594 | 0.5059 | 0.7330 | -2.0132 | 1.0085 | -3.0218 |
135
+ | 0.5294 | 0.94 | 7200 | 1.2123 | 1.9679 | -469.9014 | -550.8820 | 0.5059 | 0.7315 | -2.0141 | 1.0089 | -3.0230 |
136
+ | 0.4488 | 0.96 | 7300 | 1.2130 | 1.9688 | -469.9289 | -550.9682 | 0.5058 | 0.7320 | -2.0144 | 1.0095 | -3.0239 |
137
+ | 0.4747 | 0.97 | 7400 | 1.2122 | 1.9679 | -469.9052 | -550.9178 | 0.5057 | 0.7325 | -2.0142 | 1.0092 | -3.0234 |
138
+ | 0.4494 | 0.98 | 7500 | 1.2121 | 1.9679 | -469.9345 | -550.9584 | 0.5058 | 0.7350 | -2.0144 | 1.0093 | -3.0238 |
139
+ | 0.5319 | 0.99 | 7600 | 1.2121 | 1.9679 | -469.9345 | -550.9584 | 0.5058 | 0.7350 | -2.0144 | 1.0093 | -3.0238 |
140
 
141
 
142
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "q_proj",
23
- "up_proj",
24
- "o_proj",
25
- "k_proj",
26
  "v_proj",
27
  "gate_proj",
28
- "down_proj"
 
 
 
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
 
 
22
  "v_proj",
23
  "gate_proj",
24
+ "o_proj",
25
+ "k_proj",
26
+ "up_proj",
27
+ "down_proj",
28
+ "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
all_results.json CHANGED
@@ -9,13 +9,13 @@
9
  "eval_rewards/chosen": -2.0144448280334473,
10
  "eval_rewards/margins": 1.0093281269073486,
11
  "eval_rewards/rejected": -3.023772954940796,
12
- "eval_runtime": 1197.4412,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 1.67,
15
- "eval_steps_per_second": 0.835,
16
- "train_loss": 0.539378755131,
17
- "train_runtime": 172496.8646,
18
  "train_samples": 61135,
19
- "train_samples_per_second": 0.354,
20
- "train_steps_per_second": 0.044
21
  }
 
9
  "eval_rewards/chosen": -2.0144448280334473,
10
  "eval_rewards/margins": 1.0093281269073486,
11
  "eval_rewards/rejected": -3.023772954940796,
12
+ "eval_runtime": 1180.5777,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 1.694,
15
+ "eval_steps_per_second": 0.847,
16
+ "train_loss": 0.0026284047499827543,
17
+ "train_runtime": 361.3652,
18
  "train_samples": 61135,
19
+ "train_samples_per_second": 169.178,
20
+ "train_steps_per_second": 21.145
21
  }
eval_results.json CHANGED
@@ -9,8 +9,8 @@
9
  "eval_rewards/chosen": -2.0144448280334473,
10
  "eval_rewards/margins": 1.0093281269073486,
11
  "eval_rewards/rejected": -3.023772954940796,
12
- "eval_runtime": 1197.4412,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 1.67,
15
- "eval_steps_per_second": 0.835
16
  }
 
9
  "eval_rewards/chosen": -2.0144448280334473,
10
  "eval_rewards/margins": 1.0093281269073486,
11
  "eval_rewards/rejected": -3.023772954940796,
12
+ "eval_runtime": 1180.5777,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 1.694,
15
+ "eval_steps_per_second": 0.847
16
  }
runs/Jan25_08-45-35_897544936d51/events.out.tfevents.1706172432.897544936d51.14729.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43263219c6ef905a5e9b2e8d04b88fba5e7714fcfbff1319d2891e798e646198
3
+ size 7439
runs/Jan25_08-45-35_897544936d51/events.out.tfevents.1706173974.897544936d51.14729.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f839ce7963a40ec2bf5bf45a1cbb8c8a6ef7e3adce2724393423567597b83d7
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.539378755131,
4
- "train_runtime": 172496.8646,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 0.354,
7
- "train_steps_per_second": 0.044
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.0026284047499827543,
4
+ "train_runtime": 361.3652,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 169.178,
7
+ "train_steps_per_second": 21.145
8
  }
trainer_state.json CHANGED
@@ -11938,10 +11938,10 @@
11938
  "epoch": 1.0,
11939
  "step": 7641,
11940
  "total_flos": 0.0,
11941
- "train_loss": 0.539378755131,
11942
- "train_runtime": 172496.8646,
11943
- "train_samples_per_second": 0.354,
11944
- "train_steps_per_second": 0.044
11945
  }
11946
  ],
11947
  "logging_steps": 10,
 
11938
  "epoch": 1.0,
11939
  "step": 7641,
11940
  "total_flos": 0.0,
11941
+ "train_loss": 0.0026284047499827543,
11942
+ "train_runtime": 361.3652,
11943
+ "train_samples_per_second": 169.178,
11944
+ "train_steps_per_second": 21.145
11945
  }
11946
  ],
11947
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a3212819d9b82087439df507e912d8ba136160346ae0b2040b4c1a75f39c92b
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3066614533d569ebba8e0193f5911e3dd110cfda1dc478e054931f7a0e36d9c
3
  size 4856