wzhouad commited on
Commit
cf5bade
1 Parent(s): b627bc0

Model save

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.34605644783883727,
4
- "train_runtime": 6207.5812,
5
  "train_samples": 61134,
6
- "train_samples_per_second": 9.848,
7
- "train_steps_per_second": 0.077
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.37614710375947774,
4
+ "train_runtime": 6361.5592,
5
  "train_samples": 61134,
6
+ "train_samples_per_second": 9.61,
7
+ "train_steps_per_second": 0.075
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b48c4d4938d4c04a59f63ceb99963905c7f8b51613ab1c185e59808413f94a24
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8558783426207d481e1b212901b2a745cf326f53087e609ed23dbb2013f59ce2
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fd34b5e33561f0c1c4e8aba96c533e93794f3b9c20d5d385d7ab01a800026e9
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de16d834b47e78126680587374bfb8e3f6fff6da5595e08d8bc8c4dfe3f057e
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9471f53484869323cab64ce1a140ea2e9812ea57df01261f12da0a66c195e23f
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd358506aa4a2ba7d2848486443d68ed78736482c3641750882e51a5423e6c7e
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:955866100db63c46b619b676098124e61b58bb80aedf8c78a341cf53f7450c9d
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee534a0b473037a903d7605adf23f1bc01159bba79b0d8f59245eebbe979737
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.34605644783883727,
4
- "train_runtime": 6207.5812,
5
  "train_samples": 61134,
6
- "train_samples_per_second": 9.848,
7
- "train_steps_per_second": 0.077
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.37614710375947774,
4
+ "train_runtime": 6361.5592,
5
  "train_samples": 61134,
6
+ "train_samples_per_second": 9.61,
7
+ "train_steps_per_second": 0.075
8
  }
trainer_state.json CHANGED
@@ -11,669 +11,669 @@
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 2.0833333333333333e-07,
14
- "logits/chosen": 0.17659232020378113,
15
- "logits/rejected": 0.25393185019493103,
16
- "logps/chosen": -354.3299865722656,
17
- "logps/rejected": -305.2392883300781,
18
- "loss": 0.4997,
19
- "rewards/accuracies": 0.4312500059604645,
20
- "rewards/chosen": 0.007027293089777231,
21
- "rewards/margins": 0.009162568487226963,
22
- "rewards/rejected": -0.002135276095941663,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.04,
27
  "learning_rate": 4.1666666666666667e-07,
28
- "logits/chosen": 0.0710873156785965,
29
- "logits/rejected": 0.19884119927883148,
30
- "logps/chosen": -316.61993408203125,
31
- "logps/rejected": -276.21624755859375,
32
- "loss": 0.4974,
33
- "rewards/accuracies": 0.6187499761581421,
34
- "rewards/chosen": 0.011533690616488457,
35
- "rewards/margins": 0.02961091324687004,
36
- "rewards/rejected": -0.018077218905091286,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.06,
41
  "learning_rate": 6.249999999999999e-07,
42
- "logits/chosen": 0.18181222677230835,
43
- "logits/rejected": 0.2527164816856384,
44
- "logps/chosen": -294.2743225097656,
45
- "logps/rejected": -298.42523193359375,
46
- "loss": 0.4791,
47
- "rewards/accuracies": 0.675000011920929,
48
- "rewards/chosen": 0.06925608962774277,
49
- "rewards/margins": 0.0819806382060051,
50
- "rewards/rejected": -0.012724560685455799,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.08,
55
  "learning_rate": 8.333333333333333e-07,
56
- "logits/chosen": 0.11502287536859512,
57
- "logits/rejected": 0.23403987288475037,
58
- "logps/chosen": -343.6348571777344,
59
- "logps/rejected": -318.79791259765625,
60
- "loss": 0.4533,
61
- "rewards/accuracies": 0.606249988079071,
62
- "rewards/chosen": 0.3937355875968933,
63
- "rewards/margins": 0.22681434452533722,
64
- "rewards/rejected": 0.16692125797271729,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 9.999463737538052e-07,
70
- "logits/chosen": 0.17206831276416779,
71
- "logits/rejected": 0.2584429085254669,
72
- "logps/chosen": -306.1137390136719,
73
- "logps/rejected": -285.19561767578125,
74
- "loss": 0.4228,
75
- "rewards/accuracies": 0.675000011920929,
76
- "rewards/chosen": 0.4438863694667816,
77
- "rewards/margins": 0.5952231287956238,
78
- "rewards/rejected": -0.15133680403232574,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.13,
83
  "learning_rate": 9.980706626858607e-07,
84
- "logits/chosen": 0.1263621747493744,
85
- "logits/rejected": 0.2632313370704651,
86
- "logps/chosen": -289.64898681640625,
87
- "logps/rejected": -279.08453369140625,
88
- "loss": 0.4049,
89
- "rewards/accuracies": 0.6937500238418579,
90
- "rewards/chosen": 0.322978675365448,
91
- "rewards/margins": 0.6868947744369507,
92
- "rewards/rejected": -0.3639160990715027,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 9.935251313189563e-07,
98
- "logits/chosen": 0.11648492515087128,
99
- "logits/rejected": 0.19881902635097504,
100
- "logps/chosen": -322.6625671386719,
101
- "logps/rejected": -320.7023620605469,
102
- "loss": 0.3918,
103
- "rewards/accuracies": 0.612500011920929,
104
- "rewards/chosen": 0.7061153650283813,
105
- "rewards/margins": 0.8815712928771973,
106
- "rewards/rejected": -0.1754559576511383,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 9.86334145175542e-07,
112
- "logits/chosen": 0.1569955050945282,
113
- "logits/rejected": 0.25127941370010376,
114
- "logps/chosen": -308.86077880859375,
115
- "logps/rejected": -291.51300048828125,
116
- "loss": 0.3825,
117
  "rewards/accuracies": 0.75,
118
- "rewards/chosen": 0.9802907109260559,
119
- "rewards/margins": 1.4248442649841309,
120
- "rewards/rejected": -0.4445534348487854,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.19,
125
  "learning_rate": 9.765362502737097e-07,
126
- "logits/chosen": 0.01096972357481718,
127
- "logits/rejected": 0.15417027473449707,
128
- "logps/chosen": -325.28692626953125,
129
- "logps/rejected": -269.4154968261719,
130
- "loss": 0.3708,
131
- "rewards/accuracies": 0.706250011920929,
132
- "rewards/chosen": 1.165459394454956,
133
- "rewards/margins": 1.3995535373687744,
134
- "rewards/rejected": -0.2340943068265915,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.21,
139
  "learning_rate": 9.641839665080363e-07,
140
- "logits/chosen": 0.08043601363897324,
141
- "logits/rejected": 0.2466708868741989,
142
- "logps/chosen": -321.926513671875,
143
- "logps/rejected": -292.6954345703125,
144
- "loss": 0.3623,
145
- "rewards/accuracies": 0.643750011920929,
146
- "rewards/chosen": 1.2634952068328857,
147
- "rewards/margins": 1.294826626777649,
148
- "rewards/rejected": -0.03133126348257065,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.23,
153
  "learning_rate": 9.493435061259129e-07,
154
- "logits/chosen": 0.09306775033473969,
155
- "logits/rejected": 0.11595858633518219,
156
- "logps/chosen": -301.3399963378906,
157
- "logps/rejected": -301.2839660644531,
158
- "loss": 0.3404,
159
- "rewards/accuracies": 0.675000011920929,
160
- "rewards/chosen": 1.1279371976852417,
161
- "rewards/margins": 1.3113162517547607,
162
- "rewards/rejected": -0.18337900936603546,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.25,
167
  "learning_rate": 9.320944188084241e-07,
168
- "logits/chosen": 0.05181150510907173,
169
- "logits/rejected": 0.10330178588628769,
170
- "logps/chosen": -308.14410400390625,
171
- "logps/rejected": -322.2857360839844,
172
- "loss": 0.3632,
173
- "rewards/accuracies": 0.637499988079071,
174
- "rewards/chosen": 1.2409526109695435,
175
- "rewards/margins": 1.3498892784118652,
176
- "rewards/rejected": -0.10893689095973969,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.27,
181
  "learning_rate": 9.125291652582547e-07,
182
- "logits/chosen": 0.020175116136670113,
183
- "logits/rejected": 0.16819754242897034,
184
- "logps/chosen": -324.732177734375,
185
- "logps/rejected": -285.5627136230469,
186
- "loss": 0.3408,
187
- "rewards/accuracies": 0.6812499761581421,
188
- "rewards/chosen": 1.376392126083374,
189
- "rewards/margins": 1.4476263523101807,
190
- "rewards/rejected": -0.07123424857854843,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.29,
195
  "learning_rate": 8.90752621580335e-07,
196
- "logits/chosen": -0.007861034944653511,
197
- "logits/rejected": 0.14148305356502533,
198
- "logps/chosen": -322.4604797363281,
199
- "logps/rejected": -273.60321044921875,
200
- "loss": 0.3284,
201
- "rewards/accuracies": 0.6937500238418579,
202
- "rewards/chosen": 0.9582949876785278,
203
- "rewards/margins": 1.3279260396957397,
204
- "rewards/rejected": -0.36963123083114624,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.31,
209
  "learning_rate": 8.668815171119019e-07,
210
- "logits/chosen": 0.07163457572460175,
211
- "logits/rejected": 0.11265318095684052,
212
- "logps/chosen": -318.3633117675781,
213
- "logps/rejected": -314.35321044921875,
214
- "loss": 0.3452,
215
- "rewards/accuracies": 0.706250011920929,
216
- "rewards/chosen": 1.6937291622161865,
217
- "rewards/margins": 1.6340910196304321,
218
- "rewards/rejected": 0.05963808298110962,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.33,
223
  "learning_rate": 8.410438087153911e-07,
224
- "logits/chosen": 0.04666835069656372,
225
- "logits/rejected": 0.09292508661746979,
226
- "logps/chosen": -283.54595947265625,
227
- "logps/rejected": -278.94769287109375,
228
- "loss": 0.3399,
229
- "rewards/accuracies": 0.7562500238418579,
230
- "rewards/chosen": 1.8515584468841553,
231
- "rewards/margins": 1.676443099975586,
232
- "rewards/rejected": 0.17511534690856934,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.36,
237
  "learning_rate": 8.133779948881513e-07,
238
- "logits/chosen": 0.017909998074173927,
239
- "logits/rejected": 0.0967395007610321,
240
- "logps/chosen": -319.28668212890625,
241
- "logps/rejected": -307.777099609375,
242
- "loss": 0.3344,
243
- "rewards/accuracies": 0.7124999761581421,
244
- "rewards/chosen": 1.7081670761108398,
245
- "rewards/margins": 1.7009460926055908,
246
- "rewards/rejected": 0.007220864295959473,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.38,
251
  "learning_rate": 7.840323733655778e-07,
252
- "logits/chosen": -0.01807587407529354,
253
- "logits/rejected": 0.12429861724376678,
254
- "logps/chosen": -308.74945068359375,
255
- "logps/rejected": -301.0330505371094,
256
- "loss": 0.3297,
257
- "rewards/accuracies": 0.731249988079071,
258
- "rewards/chosen": 1.7282564640045166,
259
- "rewards/margins": 1.8440380096435547,
260
- "rewards/rejected": -0.1157817393541336,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.4,
265
  "learning_rate": 7.531642461971514e-07,
266
- "logits/chosen": 0.05006791278719902,
267
- "logits/rejected": 0.12837380170822144,
268
- "logps/chosen": -318.7227478027344,
269
- "logps/rejected": -313.162841796875,
270
- "loss": 0.3238,
271
- "rewards/accuracies": 0.699999988079071,
272
- "rewards/chosen": 1.5936723947525024,
273
- "rewards/margins": 1.6374588012695312,
274
- "rewards/rejected": -0.043786562979221344,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.42,
279
  "learning_rate": 7.209390765564318e-07,
280
- "logits/chosen": -0.004646389279514551,
281
- "logits/rejected": 0.1289207488298416,
282
- "logps/chosen": -325.53973388671875,
283
- "logps/rejected": -299.326904296875,
284
- "loss": 0.3379,
285
- "rewards/accuracies": 0.706250011920929,
286
- "rewards/chosen": 1.4950015544891357,
287
- "rewards/margins": 2.0150256156921387,
288
- "rewards/rejected": -0.5200243592262268,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.44,
293
  "learning_rate": 6.875296018047809e-07,
294
- "logits/chosen": 0.12037472426891327,
295
- "logits/rejected": 0.24673119187355042,
296
- "logps/chosen": -296.4547119140625,
297
- "logps/rejected": -276.714111328125,
298
- "loss": 0.337,
299
- "rewards/accuracies": 0.762499988079071,
300
- "rewards/chosen": 1.770256757736206,
301
- "rewards/margins": 2.454294204711914,
302
- "rewards/rejected": -0.6840375661849976,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.46,
307
  "learning_rate": 6.531149075630796e-07,
308
- "logits/chosen": -0.00517323287203908,
309
- "logits/rejected": 0.13203957676887512,
310
- "logps/chosen": -300.7497863769531,
311
- "logps/rejected": -265.4754333496094,
312
- "loss": 0.3394,
313
  "rewards/accuracies": 0.6937500238418579,
314
- "rewards/chosen": 1.5929324626922607,
315
- "rewards/margins": 1.7734311819076538,
316
- "rewards/rejected": -0.18049874901771545,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.48,
321
  "learning_rate": 6.178794677547137e-07,
322
- "logits/chosen": 0.0826948806643486,
323
- "logits/rejected": 0.13583150506019592,
324
- "logps/chosen": -309.74224853515625,
325
- "logps/rejected": -286.40533447265625,
326
- "loss": 0.328,
327
- "rewards/accuracies": 0.6937500238418579,
328
- "rewards/chosen": 1.6319090127944946,
329
- "rewards/margins": 1.9603748321533203,
330
- "rewards/rejected": -0.32846564054489136,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.5,
335
  "learning_rate": 5.820121557655108e-07,
336
- "logits/chosen": 0.04763277620077133,
337
- "logits/rejected": 0.10109977424144745,
338
- "logps/chosen": -296.3155822753906,
339
- "logps/rejected": -299.51873779296875,
340
- "loss": 0.3406,
341
- "rewards/accuracies": 0.6499999761581421,
342
- "rewards/chosen": 1.5636177062988281,
343
- "rewards/margins": 1.3247454166412354,
344
- "rewards/rejected": 0.23887233436107635,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.52,
349
  "learning_rate": 5.457052320211339e-07,
350
- "logits/chosen": 0.012852217070758343,
351
- "logits/rejected": 0.1158476248383522,
352
- "logps/chosen": -294.8091125488281,
353
- "logps/rejected": -282.0174865722656,
354
- "loss": 0.3307,
355
- "rewards/accuracies": 0.762499988079071,
356
- "rewards/chosen": 1.7239019870758057,
357
- "rewards/margins": 1.7212518453598022,
358
- "rewards/rejected": 0.002649706555530429,
359
  "step": 250
360
  },
361
  {
362
  "epoch": 0.54,
363
  "learning_rate": 5.091533134088387e-07,
364
- "logits/chosen": -0.014469897374510765,
365
- "logits/rejected": 0.07733233273029327,
366
- "logps/chosen": -338.17327880859375,
367
- "logps/rejected": -299.3669738769531,
368
- "loss": 0.3441,
369
- "rewards/accuracies": 0.65625,
370
- "rewards/chosen": 1.6470504999160767,
371
- "rewards/margins": 1.5775295495986938,
372
- "rewards/rejected": 0.06952105462551117,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 0.57,
377
  "learning_rate": 4.7255233006783624e-07,
378
- "logits/chosen": 0.07055391371250153,
379
- "logits/rejected": 0.12625916302204132,
380
- "logps/chosen": -308.22979736328125,
381
- "logps/rejected": -314.9990234375,
382
- "loss": 0.3276,
383
- "rewards/accuracies": 0.737500011920929,
384
- "rewards/chosen": 2.090404987335205,
385
- "rewards/margins": 2.5454375743865967,
386
- "rewards/rejected": -0.4550328850746155,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 0.59,
391
  "learning_rate": 4.3609847514019763e-07,
392
- "logits/chosen": 0.04763927310705185,
393
- "logits/rejected": 0.060921769589185715,
394
- "logps/chosen": -279.77642822265625,
395
- "logps/rejected": -291.3137512207031,
396
- "loss": 0.333,
397
- "rewards/accuracies": 0.65625,
398
- "rewards/chosen": 1.6706489324569702,
399
- "rewards/margins": 1.6953166723251343,
400
- "rewards/rejected": -0.02466759644448757,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 0.61,
405
  "learning_rate": 3.9998715311197783e-07,
406
- "logits/chosen": 0.009870557114481926,
407
- "logits/rejected": 0.0942167341709137,
408
- "logps/chosen": -330.2633972167969,
409
- "logps/rejected": -327.8630065917969,
410
- "loss": 0.324,
411
  "rewards/accuracies": 0.737500011920929,
412
- "rewards/chosen": 1.6167709827423096,
413
- "rewards/margins": 2.1237380504608154,
414
- "rewards/rejected": -0.5069671869277954,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 0.63,
419
  "learning_rate": 3.6441193238179146e-07,
420
- "logits/chosen": 0.07049299776554108,
421
- "logits/rejected": 0.08977767825126648,
422
- "logps/chosen": -274.5482177734375,
423
- "logps/rejected": -326.8363342285156,
424
- "loss": 0.3165,
425
- "rewards/accuracies": 0.731249988079071,
426
- "rewards/chosen": 1.7206777334213257,
427
- "rewards/margins": 2.070065498352051,
428
- "rewards/rejected": -0.34938788414001465,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 0.65,
433
  "learning_rate": 3.295635076714144e-07,
434
- "logits/chosen": 0.06275717914104462,
435
- "logits/rejected": 0.09255780279636383,
436
- "logps/chosen": -279.8996276855469,
437
- "logps/rejected": -316.30621337890625,
438
- "loss": 0.3135,
439
- "rewards/accuracies": 0.699999988079071,
440
- "rewards/chosen": 1.5832184553146362,
441
- "rewards/margins": 2.043025493621826,
442
- "rewards/rejected": -0.4598070979118347,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 0.67,
447
  "learning_rate": 2.956286778402226e-07,
448
- "logits/chosen": 0.009205429814755917,
449
- "logits/rejected": 0.15460598468780518,
450
- "logps/chosen": -296.5576171875,
451
- "logps/rejected": -295.835693359375,
452
- "loss": 0.3264,
453
  "rewards/accuracies": 0.7250000238418579,
454
- "rewards/chosen": 1.9073429107666016,
455
- "rewards/margins": 2.4707765579223633,
456
- "rewards/rejected": -0.5634336471557617,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 0.69,
461
  "learning_rate": 2.6278934458271996e-07,
462
- "logits/chosen": 0.058381110429763794,
463
- "logits/rejected": 0.13819275796413422,
464
- "logps/chosen": -281.2006530761719,
465
- "logps/rejected": -313.47174072265625,
466
- "loss": 0.3232,
467
- "rewards/accuracies": 0.6937500238418579,
468
- "rewards/chosen": 1.7567777633666992,
469
- "rewards/margins": 2.0288453102111816,
470
- "rewards/rejected": -0.27206745743751526,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 0.71,
475
  "learning_rate": 2.312215373764551e-07,
476
- "logits/chosen": 0.02380923368036747,
477
- "logits/rejected": 0.11691228300333023,
478
- "logps/chosen": -269.53045654296875,
479
- "logps/rejected": -264.8174133300781,
480
- "loss": 0.3176,
481
- "rewards/accuracies": 0.793749988079071,
482
- "rewards/chosen": 1.7166016101837158,
483
- "rewards/margins": 2.1150426864624023,
484
- "rewards/rejected": -0.39844104647636414,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 0.73,
489
  "learning_rate": 2.0109446990692963e-07,
490
- "logits/chosen": -0.0028537046164274216,
491
- "logits/rejected": 0.010644497349858284,
492
- "logps/chosen": -285.66375732421875,
493
- "logps/rejected": -311.220703125,
494
- "loss": 0.3286,
495
- "rewards/accuracies": 0.6625000238418579,
496
- "rewards/chosen": 1.583837628364563,
497
- "rewards/margins": 1.6484451293945312,
498
- "rewards/rejected": -0.06460770219564438,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 0.75,
503
  "learning_rate": 1.725696330273575e-07,
504
- "logits/chosen": -0.04485073685646057,
505
- "logits/rejected": 0.07074587792158127,
506
- "logps/chosen": -308.7581787109375,
507
- "logps/rejected": -282.4355773925781,
508
- "loss": 0.3048,
509
- "rewards/accuracies": 0.7562500238418579,
510
- "rewards/chosen": 1.9399118423461914,
511
- "rewards/margins": 1.9651952981948853,
512
- "rewards/rejected": -0.025283390656113625,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 0.77,
517
  "learning_rate": 1.4579992911531496e-07,
518
- "logits/chosen": -0.02086697518825531,
519
- "logits/rejected": 0.06267571449279785,
520
- "logps/chosen": -326.6328125,
521
- "logps/rejected": -318.2734375,
522
- "loss": 0.3048,
523
- "rewards/accuracies": 0.75,
524
- "rewards/chosen": 2.179868221282959,
525
- "rewards/margins": 2.945388078689575,
526
- "rewards/rejected": -0.7655196189880371,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 0.8,
531
  "learning_rate": 1.209288524664029e-07,
532
- "logits/chosen": 0.030310412868857384,
533
- "logits/rejected": 0.11966564506292343,
534
- "logps/chosen": -277.7276916503906,
535
- "logps/rejected": -276.39739990234375,
536
- "loss": 0.303,
537
- "rewards/accuracies": 0.675000011920929,
538
- "rewards/chosen": 1.5345547199249268,
539
- "rewards/margins": 1.9558677673339844,
540
- "rewards/rejected": -0.4213130474090576,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 0.82,
545
  "learning_rate": 9.808972011828054e-08,
546
- "logits/chosen": -0.06816152483224869,
547
- "logits/rejected": 0.05935809761285782,
548
- "logps/chosen": -320.16424560546875,
549
- "logps/rejected": -303.69097900390625,
550
- "loss": 0.31,
551
- "rewards/accuracies": 0.762499988079071,
552
- "rewards/chosen": 2.1527152061462402,
553
- "rewards/margins": 2.423252582550049,
554
- "rewards/rejected": -0.2705369293689728,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 0.84,
559
  "learning_rate": 7.740495722810269e-08,
560
- "logits/chosen": 0.00943700410425663,
561
- "logits/rejected": 0.103798508644104,
562
- "logps/chosen": -298.0252685546875,
563
- "logps/rejected": -297.83843994140625,
564
- "loss": 0.3215,
565
- "rewards/accuracies": 0.7250000238418579,
566
- "rewards/chosen": 1.8979790210723877,
567
- "rewards/margins": 2.2030446529388428,
568
- "rewards/rejected": -0.30506545305252075,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 0.86,
573
  "learning_rate": 5.898544083397e-08,
574
- "logits/chosen": -0.059524454176425934,
575
- "logits/rejected": 0.015174726955592632,
576
- "logps/chosen": -273.3992614746094,
577
- "logps/rejected": -292.0383605957031,
578
- "loss": 0.3228,
579
- "rewards/accuracies": 0.6499999761581421,
580
- "rewards/chosen": 1.579828143119812,
581
- "rewards/margins": 1.7233396768569946,
582
- "rewards/rejected": -0.14351139962673187,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 0.88,
587
  "learning_rate": 4.292990551804171e-08,
588
- "logits/chosen": 0.0325622633099556,
589
- "logits/rejected": 0.11653436720371246,
590
- "logps/chosen": -273.19097900390625,
591
- "logps/rejected": -286.6775817871094,
592
- "loss": 0.3048,
593
- "rewards/accuracies": 0.75,
594
- "rewards/chosen": 2.0142855644226074,
595
- "rewards/margins": 2.1165807247161865,
596
- "rewards/rejected": -0.10229482501745224,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 0.9,
601
  "learning_rate": 2.9324414157151367e-08,
602
- "logits/chosen": 0.028907526284456253,
603
- "logits/rejected": 0.059953343123197556,
604
- "logps/chosen": -314.8848876953125,
605
- "logps/rejected": -325.2813720703125,
606
- "loss": 0.3156,
607
- "rewards/accuracies": 0.699999988079071,
608
- "rewards/chosen": 1.919198989868164,
609
- "rewards/margins": 2.0691730976104736,
610
- "rewards/rejected": -0.14997398853302002,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 0.92,
615
  "learning_rate": 1.824189659787284e-08,
616
- "logits/chosen": 0.022467201575636864,
617
- "logits/rejected": 0.049929820001125336,
618
- "logps/chosen": -268.8197937011719,
619
- "logps/rejected": -279.419921875,
620
- "loss": 0.3137,
621
- "rewards/accuracies": 0.65625,
622
- "rewards/chosen": 1.5737355947494507,
623
- "rewards/margins": 1.738011360168457,
624
- "rewards/rejected": -0.16427570581436157,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 0.94,
629
  "learning_rate": 9.741758728888217e-09,
630
- "logits/chosen": -0.012446149252355099,
631
- "logits/rejected": 0.16082307696342468,
632
- "logps/chosen": -331.36688232421875,
633
- "logps/rejected": -287.3267822265625,
634
- "loss": 0.3116,
635
- "rewards/accuracies": 0.731249988079071,
636
- "rewards/chosen": 1.8584492206573486,
637
- "rewards/margins": 2.196075677871704,
638
- "rewards/rejected": -0.337626576423645,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 0.96,
643
  "learning_rate": 3.869564046156459e-09,
644
- "logits/chosen": 0.03111204504966736,
645
- "logits/rejected": 0.199659526348114,
646
- "logps/chosen": -279.9968566894531,
647
- "logps/rejected": -270.5770568847656,
648
- "loss": 0.3212,
649
- "rewards/accuracies": 0.7562500238418579,
650
- "rewards/chosen": 1.7940731048583984,
651
- "rewards/margins": 2.2761783599853516,
652
- "rewards/rejected": -0.4821050763130188,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 0.98,
657
  "learning_rate": 6.567894177967325e-10,
658
- "logits/chosen": -0.010620922781527042,
659
- "logits/rejected": 0.12738940119743347,
660
- "logps/chosen": -319.92535400390625,
661
- "logps/rejected": -303.04998779296875,
662
- "loss": 0.3148,
663
- "rewards/accuracies": 0.75,
664
- "rewards/chosen": 1.9223756790161133,
665
- "rewards/margins": 2.479106903076172,
666
- "rewards/rejected": -0.5567313432693481,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.0,
671
  "step": 477,
672
  "total_flos": 0.0,
673
- "train_loss": 0.34605644783883727,
674
- "train_runtime": 6207.5812,
675
- "train_samples_per_second": 9.848,
676
- "train_steps_per_second": 0.077
677
  }
678
  ],
679
  "logging_steps": 10,
 
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 2.0833333333333333e-07,
14
+ "logits/chosen": 0.1770419478416443,
15
+ "logits/rejected": 0.2540443539619446,
16
+ "logps/chosen": -354.38037109375,
17
+ "logps/rejected": -305.27264404296875,
18
+ "loss": 0.4999,
19
+ "rewards/accuracies": 0.40625,
20
+ "rewards/chosen": 0.0009949840605258942,
21
+ "rewards/margins": 0.003730112686753273,
22
+ "rewards/rejected": -0.0027351281605660915,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.04,
27
  "learning_rate": 4.1666666666666667e-07,
28
+ "logits/chosen": 0.07181452214717865,
29
+ "logits/rejected": 0.19976207613945007,
30
+ "logps/chosen": -316.61358642578125,
31
+ "logps/rejected": -276.0943603515625,
32
+ "loss": 0.4986,
33
+ "rewards/accuracies": 0.512499988079071,
34
+ "rewards/chosen": 0.006085564382374287,
35
+ "rewards/margins": 0.009029300883412361,
36
+ "rewards/rejected": -0.002943736733868718,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.06,
41
  "learning_rate": 6.249999999999999e-07,
42
+ "logits/chosen": 0.18359068036079407,
43
+ "logits/rejected": 0.2548081874847412,
44
+ "logps/chosen": -294.26654052734375,
45
+ "logps/rejected": -298.5642395019531,
46
+ "loss": 0.4893,
47
+ "rewards/accuracies": 0.706250011920929,
48
+ "rewards/chosen": 0.03501707315444946,
49
+ "rewards/margins": 0.048330314457416534,
50
+ "rewards/rejected": -0.013313241302967072,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.08,
55
  "learning_rate": 8.333333333333333e-07,
56
+ "logits/chosen": 0.11794896423816681,
57
+ "logits/rejected": 0.23672719299793243,
58
+ "logps/chosen": -343.41192626953125,
59
+ "logps/rejected": -318.60638427734375,
60
+ "loss": 0.4731,
61
+ "rewards/accuracies": 0.612500011920929,
62
+ "rewards/chosen": 0.20801420509815216,
63
+ "rewards/margins": 0.1149774044752121,
64
+ "rewards/rejected": 0.09303676337003708,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 9.999463737538052e-07,
70
+ "logits/chosen": 0.18171748518943787,
71
+ "logits/rejected": 0.2686474025249481,
72
+ "logps/chosen": -305.8929443359375,
73
+ "logps/rejected": -285.5357360839844,
74
+ "loss": 0.4517,
75
+ "rewards/accuracies": 0.699999988079071,
76
+ "rewards/chosen": 0.2329844981431961,
77
+ "rewards/margins": 0.32565948367118835,
78
+ "rewards/rejected": -0.09267498552799225,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.13,
83
  "learning_rate": 9.980706626858607e-07,
84
+ "logits/chosen": 0.14295880496501923,
85
+ "logits/rejected": 0.28016844391822815,
86
+ "logps/chosen": -290.75335693359375,
87
+ "logps/rejected": -281.30303955078125,
88
+ "loss": 0.4337,
89
+ "rewards/accuracies": 0.6875,
90
+ "rewards/chosen": 0.10626886039972305,
91
+ "rewards/margins": 0.39915287494659424,
92
+ "rewards/rejected": -0.29288405179977417,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 9.935251313189563e-07,
98
+ "logits/chosen": 0.1319437325000763,
99
+ "logits/rejected": 0.21764138340950012,
100
+ "logps/chosen": -324.74078369140625,
101
+ "logps/rejected": -324.4031066894531,
102
+ "loss": 0.4212,
103
+ "rewards/accuracies": 0.606249988079071,
104
+ "rewards/chosen": 0.24914593994617462,
105
+ "rewards/margins": 0.5219111442565918,
106
+ "rewards/rejected": -0.27276521921157837,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 9.86334145175542e-07,
112
+ "logits/chosen": 0.17676237225532532,
113
+ "logits/rejected": 0.27126845717430115,
114
+ "logps/chosen": -311.08868408203125,
115
+ "logps/rejected": -297.3030090332031,
116
+ "loss": 0.4116,
117
  "rewards/accuracies": 0.75,
118
+ "rewards/chosen": 0.3787495195865631,
119
+ "rewards/margins": 0.8905243873596191,
120
+ "rewards/rejected": -0.5117748379707336,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.19,
125
  "learning_rate": 9.765362502737097e-07,
126
+ "logits/chosen": 0.03276940807700157,
127
+ "logits/rejected": 0.1763920933008194,
128
+ "logps/chosen": -327.9542541503906,
129
+ "logps/rejected": -275.962890625,
130
+ "loss": 0.401,
131
+ "rewards/accuracies": 0.699999988079071,
132
+ "rewards/chosen": 0.4493633210659027,
133
+ "rewards/margins": 0.8937808871269226,
134
+ "rewards/rejected": -0.4444176256656647,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.21,
139
  "learning_rate": 9.641839665080363e-07,
140
+ "logits/chosen": 0.10452715307474136,
141
+ "logits/rejected": 0.27761924266815186,
142
+ "logps/chosen": -323.75360107421875,
143
+ "logps/rejected": -299.73370361328125,
144
+ "loss": 0.3917,
145
+ "rewards/accuracies": 0.6312500238418579,
146
+ "rewards/chosen": 0.5403918027877808,
147
+ "rewards/margins": 0.907971978187561,
148
+ "rewards/rejected": -0.3675800561904907,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.23,
153
  "learning_rate": 9.493435061259129e-07,
154
+ "logits/chosen": 0.1223590150475502,
155
+ "logits/rejected": 0.14537492394447327,
156
+ "logps/chosen": -303.0465087890625,
157
+ "logps/rejected": -307.5492248535156,
158
+ "loss": 0.3772,
159
+ "rewards/accuracies": 0.6875,
160
+ "rewards/chosen": 0.4786440432071686,
161
+ "rewards/margins": 0.8835989236831665,
162
+ "rewards/rejected": -0.4049549102783203,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.25,
167
  "learning_rate": 9.320944188084241e-07,
168
+ "logits/chosen": 0.07157851755619049,
169
+ "logits/rejected": 0.12891840934753418,
170
+ "logps/chosen": -309.7882385253906,
171
+ "logps/rejected": -329.6763610839844,
172
+ "loss": 0.3939,
173
+ "rewards/accuracies": 0.6312500238418579,
174
+ "rewards/chosen": 0.538270115852356,
175
+ "rewards/margins": 0.9622691869735718,
176
+ "rewards/rejected": -0.4239990711212158,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.27,
181
  "learning_rate": 9.125291652582547e-07,
182
+ "logits/chosen": 0.03255900740623474,
183
+ "logits/rejected": 0.1836429387331009,
184
+ "logps/chosen": -328.1858825683594,
185
+ "logps/rejected": -294.7179260253906,
186
+ "loss": 0.3745,
187
+ "rewards/accuracies": 0.6625000238418579,
188
+ "rewards/chosen": 0.5155087113380432,
189
+ "rewards/margins": 1.008885145187378,
190
+ "rewards/rejected": -0.4933764338493347,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.29,
195
  "learning_rate": 8.90752621580335e-07,
196
+ "logits/chosen": 0.008989883586764336,
197
+ "logits/rejected": 0.17159470915794373,
198
+ "logps/chosen": -326.74957275390625,
199
+ "logps/rejected": -284.432373046875,
200
+ "loss": 0.3671,
201
+ "rewards/accuracies": 0.6812499761581421,
202
+ "rewards/chosen": 0.2646927833557129,
203
+ "rewards/margins": 0.9909642338752747,
204
+ "rewards/rejected": -0.7262714505195618,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.31,
209
  "learning_rate": 8.668815171119019e-07,
210
+ "logits/chosen": 0.10270164906978607,
211
+ "logits/rejected": 0.15117475390434265,
212
+ "logps/chosen": -319.47796630859375,
213
+ "logps/rejected": -322.017822265625,
214
+ "loss": 0.3768,
215
+ "rewards/accuracies": 0.6875,
216
+ "rewards/chosen": 0.7911346554756165,
217
+ "rewards/margins": 1.1445444822311401,
218
+ "rewards/rejected": -0.35340994596481323,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.33,
223
  "learning_rate": 8.410438087153911e-07,
224
+ "logits/chosen": 0.07868606597185135,
225
+ "logits/rejected": 0.12421569973230362,
226
+ "logps/chosen": -283.022216796875,
227
+ "logps/rejected": -284.0340881347656,
228
+ "loss": 0.3737,
229
+ "rewards/accuracies": 0.7437499761581421,
230
+ "rewards/chosen": 0.9519669413566589,
231
+ "rewards/margins": 1.1187279224395752,
232
+ "rewards/rejected": -0.16676095128059387,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.36,
237
  "learning_rate": 8.133779948881513e-07,
238
+ "logits/chosen": 0.053541384637355804,
239
+ "logits/rejected": 0.13824662566184998,
240
+ "logps/chosen": -318.894775390625,
241
+ "logps/rejected": -313.05755615234375,
242
+ "loss": 0.3657,
243
+ "rewards/accuracies": 0.706250011920929,
244
+ "rewards/chosen": 0.8736802339553833,
245
+ "rewards/margins": 1.134092926979065,
246
+ "rewards/rejected": -0.26041263341903687,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.38,
251
  "learning_rate": 7.840323733655778e-07,
252
+ "logits/chosen": 0.01729046180844307,
253
+ "logits/rejected": 0.1666645109653473,
254
+ "logps/chosen": -309.3392639160156,
255
+ "logps/rejected": -307.69378662109375,
256
+ "loss": 0.3669,
257
+ "rewards/accuracies": 0.7250000238418579,
258
+ "rewards/chosen": 0.8346372842788696,
259
+ "rewards/margins": 1.2255662679672241,
260
+ "rewards/rejected": -0.39092904329299927,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.4,
265
  "learning_rate": 7.531642461971514e-07,
266
+ "logits/chosen": 0.07640022039413452,
267
+ "logits/rejected": 0.1669580638408661,
268
+ "logps/chosen": -320.5805358886719,
269
+ "logps/rejected": -321.72894287109375,
270
+ "loss": 0.3595,
271
+ "rewards/accuracies": 0.7124999761581421,
272
+ "rewards/chosen": 0.7039467692375183,
273
+ "rewards/margins": 1.15414297580719,
274
+ "rewards/rejected": -0.4501960873603821,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.42,
279
  "learning_rate": 7.209390765564318e-07,
280
+ "logits/chosen": 0.03291007876396179,
281
+ "logits/rejected": 0.178420752286911,
282
+ "logps/chosen": -325.4523010253906,
283
+ "logps/rejected": -307.71624755859375,
284
+ "loss": 0.3669,
285
+ "rewards/accuracies": 0.7250000238418579,
286
+ "rewards/chosen": 0.7518741488456726,
287
+ "rewards/margins": 1.431354284286499,
288
+ "rewards/rejected": -0.679480254650116,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.44,
293
  "learning_rate": 6.875296018047809e-07,
294
+ "logits/chosen": 0.16855312883853912,
295
+ "logits/rejected": 0.3136471211910248,
296
+ "logps/chosen": -296.46148681640625,
297
+ "logps/rejected": -285.39666748046875,
298
+ "loss": 0.3694,
299
+ "rewards/accuracies": 0.768750011920929,
300
+ "rewards/chosen": 0.8847891092300415,
301
+ "rewards/margins": 1.6609344482421875,
302
+ "rewards/rejected": -0.7761452794075012,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.46,
307
  "learning_rate": 6.531149075630796e-07,
308
+ "logits/chosen": 0.03008892573416233,
309
+ "logits/rejected": 0.1899140179157257,
310
+ "logps/chosen": -303.166015625,
311
+ "logps/rejected": -271.8243408203125,
312
+ "loss": 0.3701,
313
  "rewards/accuracies": 0.6937500238418579,
314
+ "rewards/chosen": 0.6756522059440613,
315
+ "rewards/margins": 1.0833475589752197,
316
+ "rewards/rejected": -0.4076954424381256,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.48,
321
  "learning_rate": 6.178794677547137e-07,
322
+ "logits/chosen": 0.1291465163230896,
323
+ "logits/rejected": 0.1802050620317459,
324
+ "logps/chosen": -310.93695068359375,
325
+ "logps/rejected": -295.5335998535156,
326
+ "loss": 0.3611,
327
+ "rewards/accuracies": 0.675000011920929,
328
+ "rewards/chosen": 0.7562187314033508,
329
+ "rewards/margins": 1.3768624067306519,
330
+ "rewards/rejected": -0.6206437945365906,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.5,
335
  "learning_rate": 5.820121557655108e-07,
336
+ "logits/chosen": 0.07808051258325577,
337
+ "logits/rejected": 0.12852515280246735,
338
+ "logps/chosen": -298.18524169921875,
339
+ "logps/rejected": -307.0373229980469,
340
+ "loss": 0.3695,
341
+ "rewards/accuracies": 0.643750011920929,
342
+ "rewards/chosen": 0.6883242130279541,
343
+ "rewards/margins": 0.9448167681694031,
344
+ "rewards/rejected": -0.2564924955368042,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.52,
349
  "learning_rate": 5.457052320211339e-07,
350
+ "logits/chosen": 0.028507575392723083,
351
+ "logits/rejected": 0.13449445366859436,
352
+ "logps/chosen": -294.77081298828125,
353
+ "logps/rejected": -290.4283752441406,
354
+ "loss": 0.363,
355
+ "rewards/accuracies": 0.706250011920929,
356
+ "rewards/chosen": 0.863865077495575,
357
+ "rewards/margins": 1.283084750175476,
358
+ "rewards/rejected": -0.4192196726799011,
359
  "step": 250
360
  },
361
  {
362
  "epoch": 0.54,
363
  "learning_rate": 5.091533134088387e-07,
364
+ "logits/chosen": -0.012728470377624035,
365
+ "logits/rejected": 0.08861465752124786,
366
+ "logps/chosen": -338.91156005859375,
367
+ "logps/rejected": -306.783935546875,
368
+ "loss": 0.3741,
369
+ "rewards/accuracies": 0.625,
370
+ "rewards/chosen": 0.78661048412323,
371
+ "rewards/margins": 1.1226966381072998,
372
+ "rewards/rejected": -0.3360862135887146,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 0.57,
377
  "learning_rate": 4.7255233006783624e-07,
378
+ "logits/chosen": 0.08402873575687408,
379
+ "logits/rejected": 0.13576345145702362,
380
+ "logps/chosen": -310.37738037109375,
381
+ "logps/rejected": -325.88531494140625,
382
+ "loss": 0.3558,
383
+ "rewards/accuracies": 0.7562500238418579,
384
+ "rewards/chosen": 0.9378229975700378,
385
+ "rewards/margins": 1.7096540927886963,
386
+ "rewards/rejected": -0.7718309164047241,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 0.59,
391
  "learning_rate": 4.3609847514019763e-07,
392
+ "logits/chosen": 0.06844338774681091,
393
+ "logits/rejected": 0.0715598464012146,
394
+ "logps/chosen": -280.6861877441406,
395
+ "logps/rejected": -300.75091552734375,
396
+ "loss": 0.3635,
397
+ "rewards/accuracies": 0.6875,
398
+ "rewards/chosen": 0.7898384928703308,
399
+ "rewards/margins": 1.2740315198898315,
400
+ "rewards/rejected": -0.4841931462287903,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 0.61,
405
  "learning_rate": 3.9998715311197783e-07,
406
+ "logits/chosen": 0.03731096163392067,
407
+ "logits/rejected": 0.12578508257865906,
408
+ "logps/chosen": -331.48779296875,
409
+ "logps/rejected": -339.0268859863281,
410
+ "loss": 0.356,
411
  "rewards/accuracies": 0.737500011920929,
412
+ "rewards/chosen": 0.7471667528152466,
413
+ "rewards/margins": 1.5588438510894775,
414
+ "rewards/rejected": -0.811677098274231,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 0.63,
419
  "learning_rate": 3.6441193238179146e-07,
420
+ "logits/chosen": 0.10170190036296844,
421
+ "logits/rejected": 0.125459223985672,
422
+ "logps/chosen": -276.04132080078125,
423
+ "logps/rejected": -338.2930603027344,
424
+ "loss": 0.3543,
425
+ "rewards/accuracies": 0.737500011920929,
426
+ "rewards/chosen": 0.7856809496879578,
427
+ "rewards/margins": 1.5332136154174805,
428
+ "rewards/rejected": -0.7475326061248779,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 0.65,
433
  "learning_rate": 3.295635076714144e-07,
434
+ "logits/chosen": 0.10013142973184586,
435
+ "logits/rejected": 0.12764233350753784,
436
+ "logps/chosen": -281.08636474609375,
437
+ "logps/rejected": -327.638916015625,
438
+ "loss": 0.344,
439
+ "rewards/accuracies": 0.706250011920929,
440
+ "rewards/chosen": 0.7322741746902466,
441
+ "rewards/margins": 1.5288127660751343,
442
+ "rewards/rejected": -0.7965387105941772,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 0.67,
447
  "learning_rate": 2.956286778402226e-07,
448
+ "logits/chosen": 0.04914706200361252,
449
+ "logits/rejected": 0.19642756879329681,
450
+ "logps/chosen": -297.6860046386719,
451
+ "logps/rejected": -307.39886474609375,
452
+ "loss": 0.3589,
453
  "rewards/accuracies": 0.7250000238418579,
454
+ "rewards/chosen": 0.8972498774528503,
455
+ "rewards/margins": 1.7571271657943726,
456
+ "rewards/rejected": -0.8598772883415222,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 0.69,
461
  "learning_rate": 2.6278934458271996e-07,
462
+ "logits/chosen": 0.10303878784179688,
463
+ "logits/rejected": 0.18232768774032593,
464
+ "logps/chosen": -279.687744140625,
465
+ "logps/rejected": -322.4974060058594,
466
+ "loss": 0.3554,
467
+ "rewards/accuracies": 0.675000011920929,
468
+ "rewards/chosen": 0.9540345072746277,
469
+ "rewards/margins": 1.5413516759872437,
470
+ "rewards/rejected": -0.5873170495033264,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 0.71,
475
  "learning_rate": 2.312215373764551e-07,
476
+ "logits/chosen": 0.05921119451522827,
477
+ "logits/rejected": 0.1563117355108261,
478
+ "logps/chosen": -270.2222595214844,
479
+ "logps/rejected": -273.39544677734375,
480
+ "loss": 0.3482,
481
+ "rewards/accuracies": 0.7749999761581421,
482
+ "rewards/chosen": 0.8237099647521973,
483
+ "rewards/margins": 1.4518331289291382,
484
+ "rewards/rejected": -0.6281229853630066,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 0.73,
489
  "learning_rate": 2.0109446990692963e-07,
490
+ "logits/chosen": 0.026022329926490784,
491
+ "logits/rejected": 0.03612793609499931,
492
+ "logps/chosen": -287.3631286621094,
493
+ "logps/rejected": -323.0098876953125,
494
+ "loss": 0.355,
495
+ "rewards/accuracies": 0.643750011920929,
496
+ "rewards/chosen": 0.7069499492645264,
497
+ "rewards/margins": 1.3287115097045898,
498
+ "rewards/rejected": -0.6217616200447083,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 0.75,
503
  "learning_rate": 1.725696330273575e-07,
504
+ "logits/chosen": -0.008826015517115593,
505
+ "logits/rejected": 0.11012457311153412,
506
+ "logps/chosen": -308.5582580566406,
507
+ "logps/rejected": -291.039794921875,
508
+ "loss": 0.3408,
509
+ "rewards/accuracies": 0.737500011920929,
510
+ "rewards/chosen": 0.9799526929855347,
511
+ "rewards/margins": 1.4228074550628662,
512
+ "rewards/rejected": -0.44285479187965393,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 0.77,
517
  "learning_rate": 1.4579992911531496e-07,
518
+ "logits/chosen": 0.011949884705245495,
519
+ "logits/rejected": 0.10477302223443985,
520
+ "logps/chosen": -325.74664306640625,
521
+ "logps/rejected": -332.83526611328125,
522
+ "loss": 0.3372,
523
+ "rewards/accuracies": 0.7437499761581421,
524
+ "rewards/chosen": 1.134242057800293,
525
+ "rewards/margins": 2.2450954914093018,
526
+ "rewards/rejected": -1.1108531951904297,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 0.8,
531
  "learning_rate": 1.209288524664029e-07,
532
+ "logits/chosen": 0.06705882400274277,
533
+ "logits/rejected": 0.14220719039440155,
534
+ "logps/chosen": -278.93212890625,
535
+ "logps/rejected": -287.16900634765625,
536
+ "loss": 0.3393,
537
+ "rewards/accuracies": 0.6625000238418579,
538
+ "rewards/chosen": 0.707054853439331,
539
+ "rewards/margins": 1.4562907218933105,
540
+ "rewards/rejected": -0.749235987663269,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 0.82,
545
  "learning_rate": 9.808972011828054e-08,
546
+ "logits/chosen": -0.041753821074962616,
547
+ "logits/rejected": 0.10108913481235504,
548
+ "logps/chosen": -319.83123779296875,
549
+ "logps/rejected": -312.74029541015625,
550
+ "loss": 0.3434,
551
+ "rewards/accuracies": 0.731249988079071,
552
+ "rewards/chosen": 1.0930083990097046,
553
+ "rewards/margins": 1.6807419061660767,
554
+ "rewards/rejected": -0.5877334475517273,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 0.84,
559
  "learning_rate": 7.740495722810269e-08,
560
+ "logits/chosen": 0.037683337926864624,
561
+ "logits/rejected": 0.1319020837545395,
562
+ "logps/chosen": -298.04736328125,
563
+ "logps/rejected": -307.0213317871094,
564
+ "loss": 0.3519,
565
+ "rewards/accuracies": 0.75,
566
+ "rewards/chosen": 0.9478852152824402,
567
+ "rewards/margins": 1.5595645904541016,
568
+ "rewards/rejected": -0.6116792559623718,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 0.86,
573
  "learning_rate": 5.898544083397e-08,
574
+ "logits/chosen": -0.02371780201792717,
575
+ "logits/rejected": 0.03532714769244194,
576
+ "logps/chosen": -273.40704345703125,
577
+ "logps/rejected": -299.4648132324219,
578
+ "loss": 0.3533,
579
+ "rewards/accuracies": 0.625,
580
+ "rewards/chosen": 0.789527416229248,
581
+ "rewards/margins": 1.2326061725616455,
582
+ "rewards/rejected": -0.44307881593704224,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 0.88,
587
  "learning_rate": 4.292990551804171e-08,
588
+ "logits/chosen": 0.07686875015497208,
589
+ "logits/rejected": 0.1795254498720169,
590
+ "logps/chosen": -273.1594543457031,
591
+ "logps/rejected": -296.47528076171875,
592
+ "loss": 0.3382,
593
+ "rewards/accuracies": 0.762499988079071,
594
+ "rewards/chosen": 1.008718729019165,
595
+ "rewards/margins": 1.5497512817382812,
596
+ "rewards/rejected": -0.5410324335098267,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 0.9,
601
  "learning_rate": 2.9324414157151367e-08,
602
+ "logits/chosen": 0.06547899544239044,
603
+ "logits/rejected": 0.09760904312133789,
604
+ "logps/chosen": -315.00506591796875,
605
+ "logps/rejected": -338.0772399902344,
606
+ "loss": 0.3499,
607
+ "rewards/accuracies": 0.7124999761581421,
608
+ "rewards/chosen": 0.9535905122756958,
609
+ "rewards/margins": 1.6683721542358398,
610
+ "rewards/rejected": -0.7147817015647888,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 0.92,
615
  "learning_rate": 1.824189659787284e-08,
616
+ "logits/chosen": 0.051959145814180374,
617
+ "logits/rejected": 0.0797661691904068,
618
+ "logps/chosen": -269.6096496582031,
619
+ "logps/rejected": -288.70709228515625,
620
+ "loss": 0.3456,
621
+ "rewards/accuracies": 0.6499999761581421,
622
+ "rewards/chosen": 0.7473762035369873,
623
+ "rewards/margins": 1.2938742637634277,
624
+ "rewards/rejected": -0.5464980006217957,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 0.94,
629
  "learning_rate": 9.741758728888217e-09,
630
+ "logits/chosen": 0.013958173803985119,
631
+ "logits/rejected": 0.19162164628505707,
632
+ "logps/chosen": -331.8248596191406,
633
+ "logps/rejected": -298.86553955078125,
634
+ "loss": 0.3412,
635
+ "rewards/accuracies": 0.71875,
636
+ "rewards/chosen": 0.9063283801078796,
637
+ "rewards/margins": 1.6520798206329346,
638
+ "rewards/rejected": -0.7457513809204102,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 0.96,
643
  "learning_rate": 3.869564046156459e-09,
644
+ "logits/chosen": 0.05520665645599365,
645
+ "logits/rejected": 0.24095895886421204,
646
+ "logps/chosen": -279.37713623046875,
647
+ "logps/rejected": -279.66351318359375,
648
+ "loss": 0.3525,
649
+ "rewards/accuracies": 0.768750011920929,
650
+ "rewards/chosen": 0.9280216097831726,
651
+ "rewards/margins": 1.6233961582183838,
652
+ "rewards/rejected": -0.6953743100166321,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 0.98,
657
  "learning_rate": 6.567894177967325e-10,
658
+ "logits/chosen": 0.008933846838772297,
659
+ "logits/rejected": 0.1640961915254593,
660
+ "logps/chosen": -319.6179504394531,
661
+ "logps/rejected": -314.25787353515625,
662
+ "loss": 0.3473,
663
+ "rewards/accuracies": 0.7437499761581421,
664
+ "rewards/chosen": 0.9765602946281433,
665
+ "rewards/margins": 1.8153190612792969,
666
+ "rewards/rejected": -0.8387589454650879,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.0,
671
  "step": 477,
672
  "total_flos": 0.0,
673
+ "train_loss": 0.37614710375947774,
674
+ "train_runtime": 6361.5592,
675
+ "train_samples_per_second": 9.61,
676
+ "train_steps_per_second": 0.075
677
  }
678
  ],
679
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5489a9492470a4cd73257e10cf655deb8e3a29aaf1d9767180b56bdf4b151b30
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec4624b9c095040eb8aa52a9ba592de199b303541db109644b3cf58d7c369c4
3
  size 6648