Minbyul commited on
Commit
b6efc6f
1 Parent(s): 07e2c00

Model save

Browse files
README.md CHANGED
@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.3352
21
- - Rewards/chosen: -1.8457
22
- - Rewards/rejected: -5.5205
23
- - Rewards/accuracies: 0.8729
24
- - Rewards/margins: 3.6748
25
- - Logps/rejected: -1564.3583
26
- - Logps/chosen: -751.3719
27
- - Logits/rejected: -2.9460
28
- - Logits/chosen: -3.1365
29
 
30
  ## Model description
31
 
@@ -60,9 +60,9 @@ The following hyperparameters were used during training:
60
 
61
  ### Training results
62
 
63
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
- | 0.165 | 0.6 | 100 | 0.3352 | -1.8457 | -5.5205 | 0.8729 | 3.6748 | -1564.3583 | -751.3719 | -2.9460 | -3.1365 |
66
 
67
 
68
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Logits/chosen: -3.1281
21
+ - Logits/rejected: -2.9646
22
+ - Logps/chosen: -755.8733
23
+ - Logps/rejected: -1595.0167
24
+ - Loss: 0.3332
25
+ - Rewards/accuracies: 0.8686
26
+ - Rewards/chosen: -1.8907
27
+ - Rewards/margins: 3.9363
28
+ - Rewards/rejected: -5.8271
29
 
30
  ## Model description
31
 
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
64
+ |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
65
+ | 0.1684 | 0.6 | 100 | -3.1281 | -2.9646 | -755.8733 | -1595.0167 | 0.3332 | 0.8686 | -1.8907 | 3.9363 | -5.8271 |
66
 
67
 
68
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.2817753102807772,
4
- "train_runtime": 2916.9515,
5
  "train_samples": 10744,
6
- "train_samples_per_second": 3.683,
7
- "train_steps_per_second": 0.058
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.05682134947606495,
4
+ "train_runtime": 921.1998,
5
  "train_samples": 10744,
6
+ "train_samples_per_second": 11.663,
7
+ "train_steps_per_second": 0.182
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e54965f7b11a7a21455bc4c5b01631545242d6b7d21a1914d1abac427dc7d53f
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe0127f15a4461d6a7c28b08e611bd232f2d4bca94dce568ac53e325cd40fea2
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a20a85a91d65ef47d88b7a38be3f15d950615e532666830e610a7e45fa9aa1f4
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd4000af7519e59aed8955cdef8a0605c985111576d9adab856d7fab36509a6
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37aaaaca7d689f3c0c042b750c9bde846e812bf6610ca9ce994adc74b2b2c302
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633705aecaeaa274c0c1294d292969ed4084b3468ed9aa8295673918e39a7d31
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.2817753102807772,
4
- "train_runtime": 2916.9515,
5
  "train_samples": 10744,
6
- "train_samples_per_second": 3.683,
7
- "train_steps_per_second": 0.058
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.05682134947606495,
4
+ "train_runtime": 921.1998,
5
  "train_samples": 10744,
6
+ "train_samples_per_second": 11.663,
7
+ "train_steps_per_second": 0.182
8
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "grad_norm": 17.799428866198873,
14
  "learning_rate": 2.941176470588235e-08,
15
  "logits/chosen": -3.0399391651153564,
16
  "logits/rejected": -2.5624823570251465,
@@ -25,268 +25,268 @@
25
  },
26
  {
27
  "epoch": 0.06,
28
- "grad_norm": 18.84470879916231,
29
  "learning_rate": 2.941176470588235e-07,
30
- "logits/chosen": -2.7761378288269043,
31
- "logits/rejected": -2.7907724380493164,
32
- "logps/chosen": -503.99615478515625,
33
  "logps/rejected": -1057.63232421875,
34
- "loss": 0.6925,
35
- "rewards/accuracies": 0.4930555522441864,
36
- "rewards/chosen": 0.0016675572842359543,
37
- "rewards/margins": 0.0015082131139934063,
38
- "rewards/rejected": 0.00015934415569063276,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.12,
43
- "grad_norm": 20.728431631666236,
44
  "learning_rate": 4.995131923687487e-07,
45
- "logits/chosen": -2.7420125007629395,
46
- "logits/rejected": -2.6966209411621094,
47
- "logps/chosen": -548.6405029296875,
48
- "logps/rejected": -999.8572998046875,
49
- "loss": 0.6742,
50
- "rewards/accuracies": 0.7749999761581421,
51
- "rewards/chosen": 0.03226347267627716,
52
- "rewards/margins": 0.042478956282138824,
53
- "rewards/rejected": -0.010215478017926216,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.18,
58
- "grad_norm": 24.840501208406245,
59
  "learning_rate": 4.909114739839079e-07,
60
- "logits/chosen": -2.9764866828918457,
61
- "logits/rejected": -2.835902690887451,
62
- "logps/chosen": -549.3971557617188,
63
- "logps/rejected": -1060.723876953125,
64
  "loss": 0.6134,
65
  "rewards/accuracies": 0.824999988079071,
66
- "rewards/chosen": 0.06945657730102539,
67
- "rewards/margins": 0.18815770745277405,
68
- "rewards/rejected": -0.11870112270116806,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.24,
73
- "grad_norm": 28.735895531088158,
74
  "learning_rate": 4.719192614212969e-07,
75
- "logits/chosen": -3.0226290225982666,
76
- "logits/rejected": -2.992037296295166,
77
- "logps/chosen": -569.0553588867188,
78
- "logps/rejected": -1104.0389404296875,
79
- "loss": 0.4727,
80
  "rewards/accuracies": 0.856249988079071,
81
- "rewards/chosen": -0.17996565997600555,
82
- "rewards/margins": 0.7053974866867065,
83
- "rewards/rejected": -0.8853631019592285,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.3,
88
- "grad_norm": 34.5936973521811,
89
  "learning_rate": 4.4335568741374695e-07,
90
- "logits/chosen": -3.085718870162964,
91
- "logits/rejected": -3.139434814453125,
92
- "logps/chosen": -724.1493530273438,
93
- "logps/rejected": -1332.0565185546875,
94
- "loss": 0.3358,
95
- "rewards/accuracies": 0.8187500238418579,
96
- "rewards/chosen": -1.0768327713012695,
97
- "rewards/margins": 1.9719797372817993,
98
- "rewards/rejected": -3.0488123893737793,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.36,
103
- "grad_norm": 75.01680127687075,
104
  "learning_rate": 4.0645269681018434e-07,
105
- "logits/chosen": -3.012761354446411,
106
- "logits/rejected": -3.0863022804260254,
107
- "logps/chosen": -767.4573364257812,
108
- "logps/rejected": -1472.573486328125,
109
- "loss": 0.2652,
110
  "rewards/accuracies": 0.893750011920929,
111
- "rewards/chosen": -1.2531887292861938,
112
- "rewards/margins": 3.80780291557312,
113
- "rewards/rejected": -5.060992240905762,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.42,
118
- "grad_norm": 26.06996725714517,
119
  "learning_rate": 3.6280191288478435e-07,
120
- "logits/chosen": -2.997857093811035,
121
- "logits/rejected": -3.0774106979370117,
122
- "logps/chosen": -651.4761962890625,
123
- "logps/rejected": -1660.883056640625,
124
- "loss": 0.2146,
125
  "rewards/accuracies": 0.918749988079071,
126
- "rewards/chosen": -0.9511808156967163,
127
- "rewards/margins": 4.4248552322387695,
128
- "rewards/rejected": -5.376035690307617,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.48,
133
- "grad_norm": 19.983426827432616,
134
  "learning_rate": 3.142859907420615e-07,
135
- "logits/chosen": -3.0228724479675293,
136
- "logits/rejected": -3.0555529594421387,
137
- "logps/chosen": -668.3040771484375,
138
- "logps/rejected": -1470.601806640625,
139
- "loss": 0.1981,
140
  "rewards/accuracies": 0.893750011920929,
141
- "rewards/chosen": -1.036330223083496,
142
- "rewards/margins": 4.10854959487915,
143
- "rewards/rejected": -5.1448798179626465,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.54,
148
- "grad_norm": 48.7371474187784,
149
  "learning_rate": 2.629974185404951e-07,
150
- "logits/chosen": -2.960538387298584,
151
- "logits/rejected": -3.0230116844177246,
152
- "logps/chosen": -626.430908203125,
153
- "logps/rejected": -1550.2908935546875,
154
- "loss": 0.1793,
155
  "rewards/accuracies": 0.893750011920929,
156
- "rewards/chosen": -1.4039939641952515,
157
- "rewards/margins": 4.859676361083984,
158
- "rewards/rejected": -6.263670921325684,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.6,
163
- "grad_norm": 35.72914831932015,
164
  "learning_rate": 2.1114826863194878e-07,
165
- "logits/chosen": -2.9680638313293457,
166
- "logits/rejected": -3.0059874057769775,
167
- "logps/chosen": -673.1527099609375,
168
- "logps/rejected": -1737.0718994140625,
169
- "loss": 0.165,
170
  "rewards/accuracies": 0.9437500238418579,
171
- "rewards/chosen": -1.3123265504837036,
172
- "rewards/margins": 5.412586212158203,
173
- "rewards/rejected": -6.724913120269775,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.6,
178
- "eval_logits/chosen": -3.136507034301758,
179
- "eval_logits/rejected": -2.9459588527679443,
180
- "eval_logps/chosen": -751.3718872070312,
181
- "eval_logps/rejected": -1564.3582763671875,
182
- "eval_loss": 0.33522769808769226,
183
- "eval_rewards/accuracies": 0.8728813529014587,
184
- "eval_rewards/chosen": -1.8457263708114624,
185
- "eval_rewards/margins": 3.6747524738311768,
186
- "eval_rewards/rejected": -5.520478248596191,
187
- "eval_runtime": 229.3003,
188
- "eval_samples_per_second": 8.147,
189
- "eval_steps_per_second": 0.257,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 0.65,
194
- "grad_norm": 35.289047373056896,
195
  "learning_rate": 1.6097479104361326e-07,
196
- "logits/chosen": -2.9342598915100098,
197
- "logits/rejected": -2.9889469146728516,
198
- "logps/chosen": -731.7562255859375,
199
- "logps/rejected": -1630.530517578125,
200
- "loss": 0.1931,
201
- "rewards/accuracies": 0.90625,
202
- "rewards/chosen": -1.4041537046432495,
203
- "rewards/margins": 5.037538051605225,
204
- "rewards/rejected": -6.4416913986206055,
205
  "step": 110
206
  },
207
  {
208
  "epoch": 0.71,
209
- "grad_norm": 22.970974992307827,
210
  "learning_rate": 1.146409641785882e-07,
211
- "logits/chosen": -2.8369693756103516,
212
- "logits/rejected": -2.999558925628662,
213
- "logps/chosen": -675.0198364257812,
214
- "logps/rejected": -1771.7913818359375,
215
- "loss": 0.1462,
216
- "rewards/accuracies": 0.925000011920929,
217
- "rewards/chosen": -1.2913109064102173,
218
- "rewards/margins": 5.878513336181641,
219
- "rewards/rejected": -7.16982364654541,
220
  "step": 120
221
  },
222
  {
223
  "epoch": 0.77,
224
- "grad_norm": 19.730658509992367,
225
  "learning_rate": 7.414516258630244e-08,
226
- "logits/chosen": -2.9381890296936035,
227
- "logits/rejected": -2.9967472553253174,
228
- "logps/chosen": -715.4029541015625,
229
- "logps/rejected": -1808.675048828125,
230
- "loss": 0.1393,
231
  "rewards/accuracies": 0.949999988079071,
232
- "rewards/chosen": -1.4525569677352905,
233
- "rewards/margins": 5.484087944030762,
234
- "rewards/rejected": -6.936644554138184,
235
  "step": 130
236
  },
237
  {
238
  "epoch": 0.83,
239
- "grad_norm": 18.529137261894174,
240
  "learning_rate": 4.1233967214979764e-08,
241
- "logits/chosen": -2.980013370513916,
242
- "logits/rejected": -2.982477903366089,
243
- "logps/chosen": -672.929443359375,
244
- "logps/rejected": -1697.8675537109375,
245
- "loss": 0.1436,
246
- "rewards/accuracies": 0.9125000238418579,
247
- "rewards/chosen": -1.4015110731124878,
248
- "rewards/margins": 5.538142681121826,
249
- "rewards/rejected": -6.9396538734436035,
250
  "step": 140
251
  },
252
  {
253
  "epoch": 0.89,
254
- "grad_norm": 31.45814992703821,
255
  "learning_rate": 1.732683550362954e-08,
256
- "logits/chosen": -2.9663195610046387,
257
- "logits/rejected": -3.070772409439087,
258
- "logps/chosen": -677.1956787109375,
259
- "logps/rejected": -2003.745361328125,
260
- "loss": 0.0987,
261
  "rewards/accuracies": 0.9624999761581421,
262
- "rewards/chosen": -1.4282945394515991,
263
- "rewards/margins": 7.419705867767334,
264
- "rewards/rejected": -8.848000526428223,
265
  "step": 150
266
  },
267
  {
268
  "epoch": 0.95,
269
- "grad_norm": 35.76052098122492,
270
  "learning_rate": 3.4548802869627804e-09,
271
- "logits/chosen": -2.954775810241699,
272
- "logits/rejected": -2.984683036804199,
273
- "logps/chosen": -716.851806640625,
274
- "logps/rejected": -1857.295654296875,
275
- "loss": 0.1224,
276
- "rewards/accuracies": 0.918749988079071,
277
- "rewards/chosen": -1.7806593179702759,
278
- "rewards/margins": 6.663559913635254,
279
- "rewards/rejected": -8.444218635559082,
280
  "step": 160
281
  },
282
  {
283
  "epoch": 1.0,
284
  "step": 168,
285
  "total_flos": 0.0,
286
- "train_loss": 0.2817753102807772,
287
- "train_runtime": 2916.9515,
288
- "train_samples_per_second": 3.683,
289
- "train_steps_per_second": 0.058
290
  }
291
  ],
292
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "grad_norm": 17.800336779303485,
14
  "learning_rate": 2.941176470588235e-08,
15
  "logits/chosen": -3.0399391651153564,
16
  "logits/rejected": -2.5624823570251465,
 
25
  },
26
  {
27
  "epoch": 0.06,
28
+ "grad_norm": 18.838509896307443,
29
  "learning_rate": 2.941176470588235e-07,
30
+ "logits/chosen": -2.7760655879974365,
31
+ "logits/rejected": -2.790731906890869,
32
+ "logps/chosen": -503.9539489746094,
33
  "logps/rejected": -1057.63232421875,
34
+ "loss": 0.6926,
35
+ "rewards/accuracies": 0.5763888955116272,
36
+ "rewards/chosen": 0.0020897421054542065,
37
+ "rewards/margins": 0.001932685961946845,
38
+ "rewards/rejected": 0.0001570563472341746,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.12,
43
+ "grad_norm": 20.774413071968993,
44
  "learning_rate": 4.995131923687487e-07,
45
+ "logits/chosen": -2.7422165870666504,
46
+ "logits/rejected": -2.696645498275757,
47
+ "logps/chosen": -548.654296875,
48
+ "logps/rejected": -999.8585815429688,
49
+ "loss": 0.6741,
50
+ "rewards/accuracies": 0.762499988079071,
51
+ "rewards/chosen": 0.03212609142065048,
52
+ "rewards/margins": 0.042353663593530655,
53
+ "rewards/rejected": -0.010227566584944725,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.18,
58
+ "grad_norm": 24.957524647561584,
59
  "learning_rate": 4.909114739839079e-07,
60
+ "logits/chosen": -2.9761385917663574,
61
+ "logits/rejected": -2.836198329925537,
62
+ "logps/chosen": -549.5018310546875,
63
+ "logps/rejected": -1060.82763671875,
64
  "loss": 0.6134,
65
  "rewards/accuracies": 0.824999988079071,
66
+ "rewards/chosen": 0.06840862333774567,
67
+ "rewards/margins": 0.18814805150032043,
68
+ "rewards/rejected": -0.11973947286605835,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.24,
73
+ "grad_norm": 29.2493335087329,
74
  "learning_rate": 4.719192614212969e-07,
75
+ "logits/chosen": -3.023461103439331,
76
+ "logits/rejected": -2.9939627647399902,
77
+ "logps/chosen": -569.5960083007812,
78
+ "logps/rejected": -1105.576416015625,
79
+ "loss": 0.4716,
80
  "rewards/accuracies": 0.856249988079071,
81
+ "rewards/chosen": -0.18537160754203796,
82
+ "rewards/margins": 0.7153643369674683,
83
+ "rewards/rejected": -0.9007358551025391,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.3,
88
+ "grad_norm": 34.52922011163671,
89
  "learning_rate": 4.4335568741374695e-07,
90
+ "logits/chosen": -3.0847573280334473,
91
+ "logits/rejected": -3.1392886638641357,
92
+ "logps/chosen": -724.968505859375,
93
+ "logps/rejected": -1334.053955078125,
94
+ "loss": 0.3356,
95
+ "rewards/accuracies": 0.8125,
96
+ "rewards/chosen": -1.0850236415863037,
97
+ "rewards/margins": 1.9837610721588135,
98
+ "rewards/rejected": -3.068784713745117,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.36,
103
+ "grad_norm": 74.1970434298791,
104
  "learning_rate": 4.0645269681018434e-07,
105
+ "logits/chosen": -3.01001238822937,
106
+ "logits/rejected": -3.0855369567871094,
107
+ "logps/chosen": -763.7335815429688,
108
+ "logps/rejected": -1466.1148681640625,
109
+ "loss": 0.2659,
110
  "rewards/accuracies": 0.893750011920929,
111
+ "rewards/chosen": -1.215951681137085,
112
+ "rewards/margins": 3.7804553508758545,
113
+ "rewards/rejected": -4.996407508850098,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.42,
118
+ "grad_norm": 24.166083003223434,
119
  "learning_rate": 3.6280191288478435e-07,
120
+ "logits/chosen": -2.9843807220458984,
121
+ "logits/rejected": -3.0654196739196777,
122
+ "logps/chosen": -652.9537353515625,
123
+ "logps/rejected": -1663.2164306640625,
124
+ "loss": 0.2156,
125
  "rewards/accuracies": 0.918749988079071,
126
+ "rewards/chosen": -0.9659550786018372,
127
+ "rewards/margins": 4.433414459228516,
128
+ "rewards/rejected": -5.399369716644287,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.48,
133
+ "grad_norm": 17.793411473003722,
134
  "learning_rate": 3.142859907420615e-07,
135
+ "logits/chosen": -3.0192360877990723,
136
+ "logits/rejected": -3.050020217895508,
137
+ "logps/chosen": -661.2525634765625,
138
+ "logps/rejected": -1447.4609375,
139
+ "loss": 0.1989,
140
  "rewards/accuracies": 0.893750011920929,
141
+ "rewards/chosen": -0.9658153653144836,
142
+ "rewards/margins": 3.9476540088653564,
143
+ "rewards/rejected": -4.913470268249512,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.54,
148
+ "grad_norm": 55.47746105929639,
149
  "learning_rate": 2.629974185404951e-07,
150
+ "logits/chosen": -2.978877305984497,
151
+ "logits/rejected": -3.029723644256592,
152
+ "logps/chosen": -613.0646362304688,
153
+ "logps/rejected": -1531.10546875,
154
+ "loss": 0.1844,
155
  "rewards/accuracies": 0.893750011920929,
156
+ "rewards/chosen": -1.270330786705017,
157
+ "rewards/margins": 4.801483154296875,
158
+ "rewards/rejected": -6.071813583374023,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.6,
163
+ "grad_norm": 33.71679055058231,
164
  "learning_rate": 2.1114826863194878e-07,
165
+ "logits/chosen": -2.983506441116333,
166
+ "logits/rejected": -3.02791690826416,
167
+ "logps/chosen": -676.8287353515625,
168
+ "logps/rejected": -1784.007568359375,
169
+ "loss": 0.1684,
170
  "rewards/accuracies": 0.9437500238418579,
171
+ "rewards/chosen": -1.3490874767303467,
172
+ "rewards/margins": 5.845183849334717,
173
+ "rewards/rejected": -7.194271087646484,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.6,
178
+ "eval_logits/chosen": -3.1281025409698486,
179
+ "eval_logits/rejected": -2.964585304260254,
180
+ "eval_logps/chosen": -755.873291015625,
181
+ "eval_logps/rejected": -1595.0167236328125,
182
+ "eval_loss": 0.3332486152648926,
183
+ "eval_rewards/accuracies": 0.8686440587043762,
184
+ "eval_rewards/chosen": -1.8907400369644165,
185
+ "eval_rewards/margins": 3.9363222122192383,
186
+ "eval_rewards/rejected": -5.827062606811523,
187
+ "eval_runtime": 195.0133,
188
+ "eval_samples_per_second": 9.579,
189
+ "eval_steps_per_second": 0.303,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 0.65,
194
+ "grad_norm": 36.49070034902309,
195
  "learning_rate": 1.6097479104361326e-07,
196
+ "logits/chosen": -2.9343550205230713,
197
+ "logits/rejected": -3.0102057456970215,
198
+ "logps/chosen": -727.3587646484375,
199
+ "logps/rejected": -1638.8382568359375,
200
+ "loss": 0.1954,
201
+ "rewards/accuracies": 0.8999999761581421,
202
+ "rewards/chosen": -1.3601789474487305,
203
+ "rewards/margins": 5.164590835571289,
204
+ "rewards/rejected": -6.524770259857178,
205
  "step": 110
206
  },
207
  {
208
  "epoch": 0.71,
209
+ "grad_norm": 23.81218846817336,
210
  "learning_rate": 1.146409641785882e-07,
211
+ "logits/chosen": -2.8368687629699707,
212
+ "logits/rejected": -3.0225136280059814,
213
+ "logps/chosen": -666.7093505859375,
214
+ "logps/rejected": -1738.1021728515625,
215
+ "loss": 0.1519,
216
+ "rewards/accuracies": 0.9312499761581421,
217
+ "rewards/chosen": -1.2082051038742065,
218
+ "rewards/margins": 5.624727249145508,
219
+ "rewards/rejected": -6.832932472229004,
220
  "step": 120
221
  },
222
  {
223
  "epoch": 0.77,
224
+ "grad_norm": 20.21152127941066,
225
  "learning_rate": 7.414516258630244e-08,
226
+ "logits/chosen": -2.9129068851470947,
227
+ "logits/rejected": -3.0287842750549316,
228
+ "logps/chosen": -708.4816284179688,
229
+ "logps/rejected": -1789.521728515625,
230
+ "loss": 0.1424,
231
  "rewards/accuracies": 0.949999988079071,
232
+ "rewards/chosen": -1.3833436965942383,
233
+ "rewards/margins": 5.361766338348389,
234
+ "rewards/rejected": -6.745110511779785,
235
  "step": 130
236
  },
237
  {
238
  "epoch": 0.83,
239
+ "grad_norm": 19.81204521774453,
240
  "learning_rate": 4.1233967214979764e-08,
241
+ "logits/chosen": -2.979017734527588,
242
+ "logits/rejected": -3.0025482177734375,
243
+ "logps/chosen": -664.9674072265625,
244
+ "logps/rejected": -1679.622802734375,
245
+ "loss": 0.1512,
246
+ "rewards/accuracies": 0.90625,
247
+ "rewards/chosen": -1.3218904733657837,
248
+ "rewards/margins": 5.435315132141113,
249
+ "rewards/rejected": -6.757205963134766,
250
  "step": 140
251
  },
252
  {
253
  "epoch": 0.89,
254
+ "grad_norm": 33.74454128402628,
255
  "learning_rate": 1.732683550362954e-08,
256
+ "logits/chosen": -2.971137523651123,
257
+ "logits/rejected": -3.0957703590393066,
258
+ "logps/chosen": -662.1959228515625,
259
+ "logps/rejected": -1967.736083984375,
260
+ "loss": 0.1032,
261
  "rewards/accuracies": 0.9624999761581421,
262
+ "rewards/chosen": -1.2782970666885376,
263
+ "rewards/margins": 7.209610939025879,
264
+ "rewards/rejected": -8.487907409667969,
265
  "step": 150
266
  },
267
  {
268
  "epoch": 0.95,
269
+ "grad_norm": 35.57488951709773,
270
  "learning_rate": 3.4548802869627804e-09,
271
+ "logits/chosen": -2.9537863731384277,
272
+ "logits/rejected": -3.0035552978515625,
273
+ "logps/chosen": -699.1692504882812,
274
+ "logps/rejected": -1824.681884765625,
275
+ "loss": 0.1233,
276
+ "rewards/accuracies": 0.9312499761581421,
277
+ "rewards/chosen": -1.6038345098495483,
278
+ "rewards/margins": 6.514244079589844,
279
+ "rewards/rejected": -8.118078231811523,
280
  "step": 160
281
  },
282
  {
283
  "epoch": 1.0,
284
  "step": 168,
285
  "total_flos": 0.0,
286
+ "train_loss": 0.05682134947606495,
287
+ "train_runtime": 921.1998,
288
+ "train_samples_per_second": 11.663,
289
+ "train_steps_per_second": 0.182
290
  }
291
  ],
292
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09216a460e233950cd867da55d3c38f67a050a1cae634495d856cb2637f3be1
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aeb8f297f211597416ec319893fcc5d1625d3b2d5ea38c7e95ed7d3b16cbf25
3
  size 6264