wzhouad commited on
Commit
8c87b08
1 Parent(s): cf5bade

Model save

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.37614710375947774,
4
- "train_runtime": 6361.5592,
5
  "train_samples": 61134,
6
- "train_samples_per_second": 9.61,
7
  "train_steps_per_second": 0.075
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.42718374404267445,
4
+ "train_runtime": 6325.1171,
5
  "train_samples": 61134,
6
+ "train_samples_per_second": 9.665,
7
  "train_steps_per_second": 0.075
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8558783426207d481e1b212901b2a745cf326f53087e609ed23dbb2013f59ce2
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c12c9c343db96ae94ef515af0f5da81ed9a6301101432050dd413b94822f22e1
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0de16d834b47e78126680587374bfb8e3f6fff6da5595e08d8bc8c4dfe3f057e
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe6406bd258e8fdc18e1f4c122bbb1cfc0b2cd66831b06fda75bf88f2955d87
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd358506aa4a2ba7d2848486443d68ed78736482c3641750882e51a5423e6c7e
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9ea4dc30b2c98ff046a72449873d2850cf575c5ab3894159bbe51207b9cdf1
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ee534a0b473037a903d7605adf23f1bc01159bba79b0d8f59245eebbe979737
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e249a9ba96464ca0f1329eea1893d5fff0653b7a9dffc6f54ac8c684320502c
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.37614710375947774,
4
- "train_runtime": 6361.5592,
5
  "train_samples": 61134,
6
- "train_samples_per_second": 9.61,
7
  "train_steps_per_second": 0.075
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.42718374404267445,
4
+ "train_runtime": 6325.1171,
5
  "train_samples": 61134,
6
+ "train_samples_per_second": 9.665,
7
  "train_steps_per_second": 0.075
8
  }
trainer_state.json CHANGED
@@ -11,668 +11,668 @@
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 2.0833333333333333e-07,
14
- "logits/chosen": 0.1770419478416443,
15
- "logits/rejected": 0.2540443539619446,
16
- "logps/chosen": -354.38037109375,
17
- "logps/rejected": -305.27264404296875,
18
- "loss": 0.4999,
19
- "rewards/accuracies": 0.40625,
20
- "rewards/chosen": 0.0009949840605258942,
21
- "rewards/margins": 0.003730112686753273,
22
- "rewards/rejected": -0.0027351281605660915,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.04,
27
  "learning_rate": 4.1666666666666667e-07,
28
- "logits/chosen": 0.07181452214717865,
29
- "logits/rejected": 0.19976207613945007,
30
- "logps/chosen": -316.61358642578125,
31
- "logps/rejected": -276.0943603515625,
32
- "loss": 0.4986,
33
- "rewards/accuracies": 0.512499988079071,
34
- "rewards/chosen": 0.006085564382374287,
35
- "rewards/margins": 0.009029300883412361,
36
- "rewards/rejected": -0.002943736733868718,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.06,
41
  "learning_rate": 6.249999999999999e-07,
42
- "logits/chosen": 0.18359068036079407,
43
- "logits/rejected": 0.2548081874847412,
44
- "logps/chosen": -294.26654052734375,
45
- "logps/rejected": -298.5642395019531,
46
- "loss": 0.4893,
47
- "rewards/accuracies": 0.706250011920929,
48
- "rewards/chosen": 0.03501707315444946,
49
- "rewards/margins": 0.048330314457416534,
50
- "rewards/rejected": -0.013313241302967072,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.08,
55
  "learning_rate": 8.333333333333333e-07,
56
- "logits/chosen": 0.11794896423816681,
57
- "logits/rejected": 0.23672719299793243,
58
- "logps/chosen": -343.41192626953125,
59
- "logps/rejected": -318.60638427734375,
60
- "loss": 0.4731,
61
- "rewards/accuracies": 0.612500011920929,
62
- "rewards/chosen": 0.20801420509815216,
63
- "rewards/margins": 0.1149774044752121,
64
- "rewards/rejected": 0.09303676337003708,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 9.999463737538052e-07,
70
- "logits/chosen": 0.18171748518943787,
71
- "logits/rejected": 0.2686474025249481,
72
- "logps/chosen": -305.8929443359375,
73
- "logps/rejected": -285.5357360839844,
74
- "loss": 0.4517,
75
  "rewards/accuracies": 0.699999988079071,
76
- "rewards/chosen": 0.2329844981431961,
77
- "rewards/margins": 0.32565948367118835,
78
- "rewards/rejected": -0.09267498552799225,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.13,
83
  "learning_rate": 9.980706626858607e-07,
84
- "logits/chosen": 0.14295880496501923,
85
- "logits/rejected": 0.28016844391822815,
86
- "logps/chosen": -290.75335693359375,
87
- "logps/rejected": -281.30303955078125,
88
- "loss": 0.4337,
89
- "rewards/accuracies": 0.6875,
90
- "rewards/chosen": 0.10626886039972305,
91
- "rewards/margins": 0.39915287494659424,
92
- "rewards/rejected": -0.29288405179977417,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 9.935251313189563e-07,
98
- "logits/chosen": 0.1319437325000763,
99
- "logits/rejected": 0.21764138340950012,
100
- "logps/chosen": -324.74078369140625,
101
- "logps/rejected": -324.4031066894531,
102
- "loss": 0.4212,
103
- "rewards/accuracies": 0.606249988079071,
104
- "rewards/chosen": 0.24914593994617462,
105
- "rewards/margins": 0.5219111442565918,
106
- "rewards/rejected": -0.27276521921157837,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 9.86334145175542e-07,
112
- "logits/chosen": 0.17676237225532532,
113
- "logits/rejected": 0.27126845717430115,
114
- "logps/chosen": -311.08868408203125,
115
- "logps/rejected": -297.3030090332031,
116
- "loss": 0.4116,
117
- "rewards/accuracies": 0.75,
118
- "rewards/chosen": 0.3787495195865631,
119
- "rewards/margins": 0.8905243873596191,
120
- "rewards/rejected": -0.5117748379707336,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.19,
125
  "learning_rate": 9.765362502737097e-07,
126
- "logits/chosen": 0.03276940807700157,
127
- "logits/rejected": 0.1763920933008194,
128
- "logps/chosen": -327.9542541503906,
129
- "logps/rejected": -275.962890625,
130
- "loss": 0.401,
131
- "rewards/accuracies": 0.699999988079071,
132
- "rewards/chosen": 0.4493633210659027,
133
- "rewards/margins": 0.8937808871269226,
134
- "rewards/rejected": -0.4444176256656647,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.21,
139
  "learning_rate": 9.641839665080363e-07,
140
- "logits/chosen": 0.10452715307474136,
141
- "logits/rejected": 0.27761924266815186,
142
- "logps/chosen": -323.75360107421875,
143
- "logps/rejected": -299.73370361328125,
144
- "loss": 0.3917,
145
  "rewards/accuracies": 0.6312500238418579,
146
- "rewards/chosen": 0.5403918027877808,
147
- "rewards/margins": 0.907971978187561,
148
- "rewards/rejected": -0.3675800561904907,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.23,
153
  "learning_rate": 9.493435061259129e-07,
154
- "logits/chosen": 0.1223590150475502,
155
- "logits/rejected": 0.14537492394447327,
156
- "logps/chosen": -303.0465087890625,
157
- "logps/rejected": -307.5492248535156,
158
- "loss": 0.3772,
159
- "rewards/accuracies": 0.6875,
160
- "rewards/chosen": 0.4786440432071686,
161
- "rewards/margins": 0.8835989236831665,
162
- "rewards/rejected": -0.4049549102783203,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.25,
167
  "learning_rate": 9.320944188084241e-07,
168
- "logits/chosen": 0.07157851755619049,
169
- "logits/rejected": 0.12891840934753418,
170
- "logps/chosen": -309.7882385253906,
171
- "logps/rejected": -329.6763610839844,
172
- "loss": 0.3939,
173
- "rewards/accuracies": 0.6312500238418579,
174
- "rewards/chosen": 0.538270115852356,
175
- "rewards/margins": 0.9622691869735718,
176
- "rewards/rejected": -0.4239990711212158,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.27,
181
  "learning_rate": 9.125291652582547e-07,
182
- "logits/chosen": 0.03255900740623474,
183
- "logits/rejected": 0.1836429387331009,
184
- "logps/chosen": -328.1858825683594,
185
- "logps/rejected": -294.7179260253906,
186
- "loss": 0.3745,
187
- "rewards/accuracies": 0.6625000238418579,
188
- "rewards/chosen": 0.5155087113380432,
189
- "rewards/margins": 1.008885145187378,
190
- "rewards/rejected": -0.4933764338493347,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.29,
195
  "learning_rate": 8.90752621580335e-07,
196
- "logits/chosen": 0.008989883586764336,
197
- "logits/rejected": 0.17159470915794373,
198
- "logps/chosen": -326.74957275390625,
199
- "logps/rejected": -284.432373046875,
200
- "loss": 0.3671,
201
- "rewards/accuracies": 0.6812499761581421,
202
- "rewards/chosen": 0.2646927833557129,
203
- "rewards/margins": 0.9909642338752747,
204
- "rewards/rejected": -0.7262714505195618,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.31,
209
  "learning_rate": 8.668815171119019e-07,
210
- "logits/chosen": 0.10270164906978607,
211
- "logits/rejected": 0.15117475390434265,
212
- "logps/chosen": -319.47796630859375,
213
- "logps/rejected": -322.017822265625,
214
- "loss": 0.3768,
215
- "rewards/accuracies": 0.6875,
216
- "rewards/chosen": 0.7911346554756165,
217
- "rewards/margins": 1.1445444822311401,
218
- "rewards/rejected": -0.35340994596481323,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.33,
223
  "learning_rate": 8.410438087153911e-07,
224
- "logits/chosen": 0.07868606597185135,
225
- "logits/rejected": 0.12421569973230362,
226
- "logps/chosen": -283.022216796875,
227
- "logps/rejected": -284.0340881347656,
228
- "loss": 0.3737,
229
- "rewards/accuracies": 0.7437499761581421,
230
- "rewards/chosen": 0.9519669413566589,
231
- "rewards/margins": 1.1187279224395752,
232
- "rewards/rejected": -0.16676095128059387,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.36,
237
  "learning_rate": 8.133779948881513e-07,
238
- "logits/chosen": 0.053541384637355804,
239
- "logits/rejected": 0.13824662566184998,
240
- "logps/chosen": -318.894775390625,
241
- "logps/rejected": -313.05755615234375,
242
- "loss": 0.3657,
243
  "rewards/accuracies": 0.706250011920929,
244
- "rewards/chosen": 0.8736802339553833,
245
- "rewards/margins": 1.134092926979065,
246
- "rewards/rejected": -0.26041263341903687,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.38,
251
  "learning_rate": 7.840323733655778e-07,
252
- "logits/chosen": 0.01729046180844307,
253
- "logits/rejected": 0.1666645109653473,
254
- "logps/chosen": -309.3392639160156,
255
- "logps/rejected": -307.69378662109375,
256
- "loss": 0.3669,
257
- "rewards/accuracies": 0.7250000238418579,
258
- "rewards/chosen": 0.8346372842788696,
259
- "rewards/margins": 1.2255662679672241,
260
- "rewards/rejected": -0.39092904329299927,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.4,
265
  "learning_rate": 7.531642461971514e-07,
266
- "logits/chosen": 0.07640022039413452,
267
- "logits/rejected": 0.1669580638408661,
268
- "logps/chosen": -320.5805358886719,
269
- "logps/rejected": -321.72894287109375,
270
- "loss": 0.3595,
271
- "rewards/accuracies": 0.7124999761581421,
272
- "rewards/chosen": 0.7039467692375183,
273
- "rewards/margins": 1.15414297580719,
274
- "rewards/rejected": -0.4501960873603821,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.42,
279
  "learning_rate": 7.209390765564318e-07,
280
- "logits/chosen": 0.03291007876396179,
281
- "logits/rejected": 0.178420752286911,
282
- "logps/chosen": -325.4523010253906,
283
- "logps/rejected": -307.71624755859375,
284
- "loss": 0.3669,
285
- "rewards/accuracies": 0.7250000238418579,
286
- "rewards/chosen": 0.7518741488456726,
287
- "rewards/margins": 1.431354284286499,
288
- "rewards/rejected": -0.679480254650116,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.44,
293
  "learning_rate": 6.875296018047809e-07,
294
- "logits/chosen": 0.16855312883853912,
295
- "logits/rejected": 0.3136471211910248,
296
- "logps/chosen": -296.46148681640625,
297
- "logps/rejected": -285.39666748046875,
298
- "loss": 0.3694,
299
- "rewards/accuracies": 0.768750011920929,
300
- "rewards/chosen": 0.8847891092300415,
301
- "rewards/margins": 1.6609344482421875,
302
- "rewards/rejected": -0.7761452794075012,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.46,
307
  "learning_rate": 6.531149075630796e-07,
308
- "logits/chosen": 0.03008892573416233,
309
- "logits/rejected": 0.1899140179157257,
310
- "logps/chosen": -303.166015625,
311
- "logps/rejected": -271.8243408203125,
312
- "loss": 0.3701,
313
- "rewards/accuracies": 0.6937500238418579,
314
- "rewards/chosen": 0.6756522059440613,
315
- "rewards/margins": 1.0833475589752197,
316
- "rewards/rejected": -0.4076954424381256,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.48,
321
  "learning_rate": 6.178794677547137e-07,
322
- "logits/chosen": 0.1291465163230896,
323
- "logits/rejected": 0.1802050620317459,
324
- "logps/chosen": -310.93695068359375,
325
- "logps/rejected": -295.5335998535156,
326
- "loss": 0.3611,
327
- "rewards/accuracies": 0.675000011920929,
328
- "rewards/chosen": 0.7562187314033508,
329
- "rewards/margins": 1.3768624067306519,
330
- "rewards/rejected": -0.6206437945365906,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.5,
335
  "learning_rate": 5.820121557655108e-07,
336
- "logits/chosen": 0.07808051258325577,
337
- "logits/rejected": 0.12852515280246735,
338
- "logps/chosen": -298.18524169921875,
339
- "logps/rejected": -307.0373229980469,
340
- "loss": 0.3695,
341
- "rewards/accuracies": 0.643750011920929,
342
- "rewards/chosen": 0.6883242130279541,
343
- "rewards/margins": 0.9448167681694031,
344
- "rewards/rejected": -0.2564924955368042,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.52,
349
  "learning_rate": 5.457052320211339e-07,
350
- "logits/chosen": 0.028507575392723083,
351
- "logits/rejected": 0.13449445366859436,
352
- "logps/chosen": -294.77081298828125,
353
- "logps/rejected": -290.4283752441406,
354
- "loss": 0.363,
355
- "rewards/accuracies": 0.706250011920929,
356
- "rewards/chosen": 0.863865077495575,
357
- "rewards/margins": 1.283084750175476,
358
- "rewards/rejected": -0.4192196726799011,
359
  "step": 250
360
  },
361
  {
362
  "epoch": 0.54,
363
  "learning_rate": 5.091533134088387e-07,
364
- "logits/chosen": -0.012728470377624035,
365
- "logits/rejected": 0.08861465752124786,
366
- "logps/chosen": -338.91156005859375,
367
- "logps/rejected": -306.783935546875,
368
- "loss": 0.3741,
369
- "rewards/accuracies": 0.625,
370
- "rewards/chosen": 0.78661048412323,
371
- "rewards/margins": 1.1226966381072998,
372
- "rewards/rejected": -0.3360862135887146,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 0.57,
377
  "learning_rate": 4.7255233006783624e-07,
378
- "logits/chosen": 0.08402873575687408,
379
- "logits/rejected": 0.13576345145702362,
380
- "logps/chosen": -310.37738037109375,
381
- "logps/rejected": -325.88531494140625,
382
- "loss": 0.3558,
383
  "rewards/accuracies": 0.7562500238418579,
384
- "rewards/chosen": 0.9378229975700378,
385
- "rewards/margins": 1.7096540927886963,
386
- "rewards/rejected": -0.7718309164047241,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 0.59,
391
  "learning_rate": 4.3609847514019763e-07,
392
- "logits/chosen": 0.06844338774681091,
393
- "logits/rejected": 0.0715598464012146,
394
- "logps/chosen": -280.6861877441406,
395
- "logps/rejected": -300.75091552734375,
396
- "loss": 0.3635,
397
- "rewards/accuracies": 0.6875,
398
- "rewards/chosen": 0.7898384928703308,
399
- "rewards/margins": 1.2740315198898315,
400
- "rewards/rejected": -0.4841931462287903,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 0.61,
405
  "learning_rate": 3.9998715311197783e-07,
406
- "logits/chosen": 0.03731096163392067,
407
- "logits/rejected": 0.12578508257865906,
408
- "logps/chosen": -331.48779296875,
409
- "logps/rejected": -339.0268859863281,
410
- "loss": 0.356,
411
- "rewards/accuracies": 0.737500011920929,
412
- "rewards/chosen": 0.7471667528152466,
413
- "rewards/margins": 1.5588438510894775,
414
- "rewards/rejected": -0.811677098274231,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 0.63,
419
  "learning_rate": 3.6441193238179146e-07,
420
- "logits/chosen": 0.10170190036296844,
421
- "logits/rejected": 0.125459223985672,
422
- "logps/chosen": -276.04132080078125,
423
- "logps/rejected": -338.2930603027344,
424
- "loss": 0.3543,
425
- "rewards/accuracies": 0.737500011920929,
426
- "rewards/chosen": 0.7856809496879578,
427
- "rewards/margins": 1.5332136154174805,
428
- "rewards/rejected": -0.7475326061248779,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 0.65,
433
  "learning_rate": 3.295635076714144e-07,
434
- "logits/chosen": 0.10013142973184586,
435
- "logits/rejected": 0.12764233350753784,
436
- "logps/chosen": -281.08636474609375,
437
- "logps/rejected": -327.638916015625,
438
- "loss": 0.344,
439
- "rewards/accuracies": 0.706250011920929,
440
- "rewards/chosen": 0.7322741746902466,
441
- "rewards/margins": 1.5288127660751343,
442
- "rewards/rejected": -0.7965387105941772,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 0.67,
447
  "learning_rate": 2.956286778402226e-07,
448
- "logits/chosen": 0.04914706200361252,
449
- "logits/rejected": 0.19642756879329681,
450
- "logps/chosen": -297.6860046386719,
451
- "logps/rejected": -307.39886474609375,
452
- "loss": 0.3589,
453
- "rewards/accuracies": 0.7250000238418579,
454
- "rewards/chosen": 0.8972498774528503,
455
- "rewards/margins": 1.7571271657943726,
456
- "rewards/rejected": -0.8598772883415222,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 0.69,
461
  "learning_rate": 2.6278934458271996e-07,
462
- "logits/chosen": 0.10303878784179688,
463
- "logits/rejected": 0.18232768774032593,
464
- "logps/chosen": -279.687744140625,
465
- "logps/rejected": -322.4974060058594,
466
- "loss": 0.3554,
467
- "rewards/accuracies": 0.675000011920929,
468
- "rewards/chosen": 0.9540345072746277,
469
- "rewards/margins": 1.5413516759872437,
470
- "rewards/rejected": -0.5873170495033264,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 0.71,
475
  "learning_rate": 2.312215373764551e-07,
476
- "logits/chosen": 0.05921119451522827,
477
- "logits/rejected": 0.1563117355108261,
478
- "logps/chosen": -270.2222595214844,
479
- "logps/rejected": -273.39544677734375,
480
- "loss": 0.3482,
481
- "rewards/accuracies": 0.7749999761581421,
482
- "rewards/chosen": 0.8237099647521973,
483
- "rewards/margins": 1.4518331289291382,
484
- "rewards/rejected": -0.6281229853630066,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 0.73,
489
  "learning_rate": 2.0109446990692963e-07,
490
- "logits/chosen": 0.026022329926490784,
491
- "logits/rejected": 0.03612793609499931,
492
- "logps/chosen": -287.3631286621094,
493
- "logps/rejected": -323.0098876953125,
494
- "loss": 0.355,
495
- "rewards/accuracies": 0.643750011920929,
496
- "rewards/chosen": 0.7069499492645264,
497
- "rewards/margins": 1.3287115097045898,
498
- "rewards/rejected": -0.6217616200447083,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 0.75,
503
  "learning_rate": 1.725696330273575e-07,
504
- "logits/chosen": -0.008826015517115593,
505
- "logits/rejected": 0.11012457311153412,
506
- "logps/chosen": -308.5582580566406,
507
- "logps/rejected": -291.039794921875,
508
- "loss": 0.3408,
509
- "rewards/accuracies": 0.737500011920929,
510
- "rewards/chosen": 0.9799526929855347,
511
- "rewards/margins": 1.4228074550628662,
512
- "rewards/rejected": -0.44285479187965393,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 0.77,
517
  "learning_rate": 1.4579992911531496e-07,
518
- "logits/chosen": 0.011949884705245495,
519
- "logits/rejected": 0.10477302223443985,
520
- "logps/chosen": -325.74664306640625,
521
- "logps/rejected": -332.83526611328125,
522
- "loss": 0.3372,
523
- "rewards/accuracies": 0.7437499761581421,
524
- "rewards/chosen": 1.134242057800293,
525
- "rewards/margins": 2.2450954914093018,
526
- "rewards/rejected": -1.1108531951904297,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 0.8,
531
  "learning_rate": 1.209288524664029e-07,
532
- "logits/chosen": 0.06705882400274277,
533
- "logits/rejected": 0.14220719039440155,
534
- "logps/chosen": -278.93212890625,
535
- "logps/rejected": -287.16900634765625,
536
- "loss": 0.3393,
537
- "rewards/accuracies": 0.6625000238418579,
538
- "rewards/chosen": 0.707054853439331,
539
- "rewards/margins": 1.4562907218933105,
540
- "rewards/rejected": -0.749235987663269,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 0.82,
545
  "learning_rate": 9.808972011828054e-08,
546
- "logits/chosen": -0.041753821074962616,
547
- "logits/rejected": 0.10108913481235504,
548
- "logps/chosen": -319.83123779296875,
549
- "logps/rejected": -312.74029541015625,
550
- "loss": 0.3434,
551
- "rewards/accuracies": 0.731249988079071,
552
- "rewards/chosen": 1.0930083990097046,
553
- "rewards/margins": 1.6807419061660767,
554
- "rewards/rejected": -0.5877334475517273,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 0.84,
559
  "learning_rate": 7.740495722810269e-08,
560
- "logits/chosen": 0.037683337926864624,
561
- "logits/rejected": 0.1319020837545395,
562
- "logps/chosen": -298.04736328125,
563
- "logps/rejected": -307.0213317871094,
564
- "loss": 0.3519,
565
- "rewards/accuracies": 0.75,
566
- "rewards/chosen": 0.9478852152824402,
567
- "rewards/margins": 1.5595645904541016,
568
- "rewards/rejected": -0.6116792559623718,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 0.86,
573
  "learning_rate": 5.898544083397e-08,
574
- "logits/chosen": -0.02371780201792717,
575
- "logits/rejected": 0.03532714769244194,
576
- "logps/chosen": -273.40704345703125,
577
- "logps/rejected": -299.4648132324219,
578
- "loss": 0.3533,
579
- "rewards/accuracies": 0.625,
580
- "rewards/chosen": 0.789527416229248,
581
- "rewards/margins": 1.2326061725616455,
582
- "rewards/rejected": -0.44307881593704224,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 0.88,
587
  "learning_rate": 4.292990551804171e-08,
588
- "logits/chosen": 0.07686875015497208,
589
- "logits/rejected": 0.1795254498720169,
590
- "logps/chosen": -273.1594543457031,
591
- "logps/rejected": -296.47528076171875,
592
- "loss": 0.3382,
593
- "rewards/accuracies": 0.762499988079071,
594
- "rewards/chosen": 1.008718729019165,
595
- "rewards/margins": 1.5497512817382812,
596
- "rewards/rejected": -0.5410324335098267,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 0.9,
601
  "learning_rate": 2.9324414157151367e-08,
602
- "logits/chosen": 0.06547899544239044,
603
- "logits/rejected": 0.09760904312133789,
604
- "logps/chosen": -315.00506591796875,
605
- "logps/rejected": -338.0772399902344,
606
- "loss": 0.3499,
607
- "rewards/accuracies": 0.7124999761581421,
608
- "rewards/chosen": 0.9535905122756958,
609
- "rewards/margins": 1.6683721542358398,
610
- "rewards/rejected": -0.7147817015647888,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 0.92,
615
  "learning_rate": 1.824189659787284e-08,
616
- "logits/chosen": 0.051959145814180374,
617
- "logits/rejected": 0.0797661691904068,
618
- "logps/chosen": -269.6096496582031,
619
- "logps/rejected": -288.70709228515625,
620
- "loss": 0.3456,
621
- "rewards/accuracies": 0.6499999761581421,
622
- "rewards/chosen": 0.7473762035369873,
623
- "rewards/margins": 1.2938742637634277,
624
- "rewards/rejected": -0.5464980006217957,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 0.94,
629
  "learning_rate": 9.741758728888217e-09,
630
- "logits/chosen": 0.013958173803985119,
631
- "logits/rejected": 0.19162164628505707,
632
- "logps/chosen": -331.8248596191406,
633
- "logps/rejected": -298.86553955078125,
634
- "loss": 0.3412,
635
  "rewards/accuracies": 0.71875,
636
- "rewards/chosen": 0.9063283801078796,
637
- "rewards/margins": 1.6520798206329346,
638
- "rewards/rejected": -0.7457513809204102,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 0.96,
643
  "learning_rate": 3.869564046156459e-09,
644
- "logits/chosen": 0.05520665645599365,
645
- "logits/rejected": 0.24095895886421204,
646
- "logps/chosen": -279.37713623046875,
647
- "logps/rejected": -279.66351318359375,
648
- "loss": 0.3525,
649
- "rewards/accuracies": 0.768750011920929,
650
- "rewards/chosen": 0.9280216097831726,
651
- "rewards/margins": 1.6233961582183838,
652
- "rewards/rejected": -0.6953743100166321,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 0.98,
657
  "learning_rate": 6.567894177967325e-10,
658
- "logits/chosen": 0.008933846838772297,
659
- "logits/rejected": 0.1640961915254593,
660
- "logps/chosen": -319.6179504394531,
661
- "logps/rejected": -314.25787353515625,
662
- "loss": 0.3473,
663
- "rewards/accuracies": 0.7437499761581421,
664
- "rewards/chosen": 0.9765602946281433,
665
- "rewards/margins": 1.8153190612792969,
666
- "rewards/rejected": -0.8387589454650879,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.0,
671
  "step": 477,
672
  "total_flos": 0.0,
673
- "train_loss": 0.37614710375947774,
674
- "train_runtime": 6361.5592,
675
- "train_samples_per_second": 9.61,
676
  "train_steps_per_second": 0.075
677
  }
678
  ],
 
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 2.0833333333333333e-07,
14
+ "logits/chosen": 0.17655496299266815,
15
+ "logits/rejected": 0.2531452775001526,
16
+ "logps/chosen": -354.29669189453125,
17
+ "logps/rejected": -305.259765625,
18
+ "loss": 0.5,
19
+ "rewards/accuracies": 0.4312500059604645,
20
+ "rewards/chosen": 0.0010361697059124708,
21
+ "rewards/margins": 0.0014542521676048636,
22
+ "rewards/rejected": -0.00041808263631537557,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.04,
27
  "learning_rate": 4.1666666666666667e-07,
28
+ "logits/chosen": 0.07140998542308807,
29
+ "logits/rejected": 0.19915328919887543,
30
+ "logps/chosen": -316.61407470703125,
31
+ "logps/rejected": -276.1783142089844,
32
+ "loss": 0.4997,
33
+ "rewards/accuracies": 0.5874999761581421,
34
+ "rewards/chosen": 0.001211934955790639,
35
+ "rewards/margins": 0.00264042429625988,
36
+ "rewards/rejected": -0.0014284893404692411,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.06,
41
  "learning_rate": 6.249999999999999e-07,
42
+ "logits/chosen": 0.1830858290195465,
43
+ "logits/rejected": 0.25493288040161133,
44
+ "logps/chosen": -294.3023376464844,
45
+ "logps/rejected": -298.47430419921875,
46
+ "loss": 0.4979,
47
+ "rewards/accuracies": 0.675000011920929,
48
+ "rewards/chosen": 0.00664560217410326,
49
+ "rewards/margins": 0.008408578112721443,
50
+ "rewards/rejected": -0.0017629768699407578,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.08,
55
  "learning_rate": 8.333333333333333e-07,
56
+ "logits/chosen": 0.1198926791548729,
57
+ "logits/rejected": 0.2388772964477539,
58
+ "logps/chosen": -343.3688659667969,
59
+ "logps/rejected": -318.56866455078125,
60
+ "loss": 0.4944,
61
+ "rewards/accuracies": 0.6000000238418579,
62
+ "rewards/chosen": 0.04203338176012039,
63
+ "rewards/margins": 0.023049216717481613,
64
+ "rewards/rejected": 0.01898416317999363,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 9.999463737538052e-07,
70
+ "logits/chosen": 0.19016575813293457,
71
+ "logits/rejected": 0.2768324613571167,
72
+ "logps/chosen": -305.9139709472656,
73
+ "logps/rejected": -285.70263671875,
74
+ "loss": 0.4888,
75
  "rewards/accuracies": 0.699999988079071,
76
+ "rewards/chosen": 0.0463864728808403,
77
+ "rewards/margins": 0.06659023463726044,
78
+ "rewards/rejected": -0.02020375430583954,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.13,
83
  "learning_rate": 9.980706626858607e-07,
84
+ "logits/chosen": 0.1583642065525055,
85
+ "logits/rejected": 0.2964373230934143,
86
+ "logps/chosen": -292.2091979980469,
87
+ "logps/rejected": -283.33062744140625,
88
+ "loss": 0.4823,
89
+ "rewards/accuracies": 0.6937500238418579,
90
+ "rewards/chosen": 0.006695735268294811,
91
+ "rewards/margins": 0.08554854989051819,
92
+ "rewards/rejected": -0.0788528248667717,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 9.935251313189563e-07,
98
+ "logits/chosen": 0.1668189913034439,
99
+ "logits/rejected": 0.25383955240249634,
100
+ "logps/chosen": -330.51483154296875,
101
+ "logps/rejected": -332.74249267578125,
102
+ "loss": 0.476,
103
+ "rewards/accuracies": 0.612500011920929,
104
+ "rewards/chosen": -0.007911129854619503,
105
+ "rewards/margins": 0.13003569841384888,
106
+ "rewards/rejected": -0.13794682919979095,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 9.86334145175542e-07,
112
+ "logits/chosen": 0.22892770171165466,
113
+ "logits/rejected": 0.32262876629829407,
114
+ "logps/chosen": -326.62847900390625,
115
+ "logps/rejected": -321.47064208984375,
116
+ "loss": 0.4678,
117
+ "rewards/accuracies": 0.7250000238418579,
118
+ "rewards/chosen": -0.07964827120304108,
119
+ "rewards/margins": 0.2643834054470062,
120
+ "rewards/rejected": -0.3440317213535309,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.19,
125
  "learning_rate": 9.765362502737097e-07,
126
+ "logits/chosen": 0.12489993870258331,
127
+ "logits/rejected": 0.2657889425754547,
128
+ "logps/chosen": -358.5821838378906,
129
+ "logps/rejected": -333.71466064453125,
130
+ "loss": 0.4612,
131
+ "rewards/accuracies": 0.6812499761581421,
132
+ "rewards/chosen": -0.21640650928020477,
133
+ "rewards/margins": 0.4499947130680084,
134
+ "rewards/rejected": -0.6664012670516968,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.21,
139
  "learning_rate": 9.641839665080363e-07,
140
+ "logits/chosen": 0.2374851256608963,
141
+ "logits/rejected": 0.4098134934902191,
142
+ "logps/chosen": -378.7792053222656,
143
+ "logps/rejected": -408.1399841308594,
144
+ "loss": 0.4512,
145
  "rewards/accuracies": 0.6312500238418579,
146
+ "rewards/chosen": -0.44217753410339355,
147
+ "rewards/margins": 0.715401291847229,
148
+ "rewards/rejected": -1.157578706741333,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.23,
153
  "learning_rate": 9.493435061259129e-07,
154
+ "logits/chosen": 0.29897215962409973,
155
+ "logits/rejected": 0.34014248847961426,
156
+ "logps/chosen": -395.0293884277344,
157
+ "logps/rejected": -461.2764587402344,
158
+ "loss": 0.4418,
159
+ "rewards/accuracies": 0.65625,
160
+ "rewards/chosen": -0.8240998983383179,
161
+ "rewards/margins": 0.7941638231277466,
162
+ "rewards/rejected": -1.618263602256775,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.25,
167
  "learning_rate": 9.320944188084241e-07,
168
+ "logits/chosen": 0.18543429672718048,
169
+ "logits/rejected": 0.282682329416275,
170
+ "logps/chosen": -440.6853942871094,
171
+ "logps/rejected": -526.3844604492188,
172
+ "loss": 0.4495,
173
+ "rewards/accuracies": 0.6499999761581421,
174
+ "rewards/chosen": -1.201317548751831,
175
+ "rewards/margins": 0.8505627512931824,
176
+ "rewards/rejected": -2.051880359649658,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.27,
181
  "learning_rate": 9.125291652582547e-07,
182
+ "logits/chosen": 0.10988249629735947,
183
+ "logits/rejected": 0.2532512843608856,
184
+ "logps/chosen": -429.30322265625,
185
+ "logps/rejected": -460.0655822753906,
186
+ "loss": 0.4407,
187
+ "rewards/accuracies": 0.6187499761581421,
188
+ "rewards/chosen": -0.9080715179443359,
189
+ "rewards/margins": 0.8440803289413452,
190
+ "rewards/rejected": -1.7521518468856812,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.29,
195
  "learning_rate": 8.90752621580335e-07,
196
+ "logits/chosen": 0.05259154364466667,
197
+ "logits/rejected": 0.20351815223693848,
198
+ "logps/chosen": -478.1226501464844,
199
+ "logps/rejected": -552.33154296875,
200
+ "loss": 0.4381,
201
+ "rewards/accuracies": 0.6000000238418579,
202
+ "rewards/chosen": -1.4607925415039062,
203
+ "rewards/margins": 1.3634538650512695,
204
+ "rewards/rejected": -2.8242461681365967,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.31,
209
  "learning_rate": 8.668815171119019e-07,
210
+ "logits/chosen": 0.1267194300889969,
211
+ "logits/rejected": 0.16065822541713715,
212
+ "logps/chosen": -432.47418212890625,
213
+ "logps/rejected": -556.4413452148438,
214
+ "loss": 0.4373,
215
+ "rewards/accuracies": 0.731249988079071,
216
+ "rewards/chosen": -0.9717355966567993,
217
+ "rewards/margins": 1.443182110786438,
218
+ "rewards/rejected": -2.4149177074432373,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.33,
223
  "learning_rate": 8.410438087153911e-07,
224
+ "logits/chosen": 0.05742305517196655,
225
+ "logits/rejected": 0.03335579112172127,
226
+ "logps/chosen": -386.4638366699219,
227
+ "logps/rejected": -537.6171264648438,
228
+ "loss": 0.4335,
229
+ "rewards/accuracies": 0.71875,
230
+ "rewards/chosen": -0.8440232276916504,
231
+ "rewards/margins": 1.7251598834991455,
232
+ "rewards/rejected": -2.569182872772217,
233
  "step": 160
234
  },
235
  {
236
  "epoch": 0.36,
237
  "learning_rate": 8.133779948881513e-07,
238
+ "logits/chosen": 0.04388447850942612,
239
+ "logits/rejected": 0.06478340178728104,
240
+ "logps/chosen": -450.94049072265625,
241
+ "logps/rejected": -571.2717895507812,
242
+ "loss": 0.4268,
243
  "rewards/accuracies": 0.706250011920929,
244
+ "rewards/chosen": -1.1457209587097168,
245
+ "rewards/margins": 1.4885038137435913,
246
+ "rewards/rejected": -2.6342251300811768,
247
  "step": 170
248
  },
249
  {
250
  "epoch": 0.38,
251
  "learning_rate": 7.840323733655778e-07,
252
+ "logits/chosen": 0.03801240772008896,
253
+ "logits/rejected": 0.0668804943561554,
254
+ "logps/chosen": -415.9105529785156,
255
+ "logps/rejected": -594.4246826171875,
256
+ "loss": 0.426,
257
+ "rewards/accuracies": 0.6937500238418579,
258
+ "rewards/chosen": -0.8987852931022644,
259
+ "rewards/margins": 2.0467095375061035,
260
+ "rewards/rejected": -2.9454948902130127,
261
  "step": 180
262
  },
263
  {
264
  "epoch": 0.4,
265
  "learning_rate": 7.531642461971514e-07,
266
+ "logits/chosen": 0.12394122779369354,
267
+ "logits/rejected": 0.07622597366571426,
268
+ "logps/chosen": -482.99774169921875,
269
+ "logps/rejected": -617.9317626953125,
270
+ "loss": 0.4148,
271
+ "rewards/accuracies": 0.7250000238418579,
272
+ "rewards/chosen": -1.4833831787109375,
273
+ "rewards/margins": 1.5686841011047363,
274
+ "rewards/rejected": -3.052067279815674,
275
  "step": 190
276
  },
277
  {
278
  "epoch": 0.42,
279
  "learning_rate": 7.209390765564318e-07,
280
+ "logits/chosen": 0.12547728419303894,
281
+ "logits/rejected": 0.039741553366184235,
282
+ "logps/chosen": -470.0662536621094,
283
+ "logps/rejected": -810.3030395507812,
284
+ "loss": 0.4152,
285
+ "rewards/accuracies": 0.762499988079071,
286
+ "rewards/chosen": -1.2957651615142822,
287
+ "rewards/margins": 3.8659985065460205,
288
+ "rewards/rejected": -5.1617631912231445,
289
  "step": 200
290
  },
291
  {
292
  "epoch": 0.44,
293
  "learning_rate": 6.875296018047809e-07,
294
+ "logits/chosen": 0.20153549313545227,
295
+ "logits/rejected": 0.1317548155784607,
296
+ "logps/chosen": -447.82562255859375,
297
+ "logps/rejected": -725.8985595703125,
298
+ "loss": 0.4249,
299
+ "rewards/accuracies": 0.7124999761581421,
300
+ "rewards/chosen": -1.3366836309432983,
301
+ "rewards/margins": 3.223564863204956,
302
+ "rewards/rejected": -4.560248374938965,
303
  "step": 210
304
  },
305
  {
306
  "epoch": 0.46,
307
  "learning_rate": 6.531149075630796e-07,
308
+ "logits/chosen": -0.017775116488337517,
309
+ "logits/rejected": 0.05367380380630493,
310
+ "logps/chosen": -476.78790283203125,
311
+ "logps/rejected": -663.9365844726562,
312
+ "loss": 0.4167,
313
+ "rewards/accuracies": 0.6875,
314
+ "rewards/chosen": -1.6010878086090088,
315
+ "rewards/margins": 2.401573419570923,
316
+ "rewards/rejected": -4.002661228179932,
317
  "step": 220
318
  },
319
  {
320
  "epoch": 0.48,
321
  "learning_rate": 6.178794677547137e-07,
322
+ "logits/chosen": 0.07326556742191315,
323
+ "logits/rejected": -0.006058653350919485,
324
+ "logps/chosen": -590.01123046875,
325
+ "logps/rejected": -870.9129028320312,
326
+ "loss": 0.4193,
327
+ "rewards/accuracies": 0.6625000238418579,
328
+ "rewards/chosen": -2.6394991874694824,
329
+ "rewards/margins": 3.238422393798828,
330
+ "rewards/rejected": -5.8779215812683105,
331
  "step": 230
332
  },
333
  {
334
  "epoch": 0.5,
335
  "learning_rate": 5.820121557655108e-07,
336
+ "logits/chosen": 0.13632330298423767,
337
+ "logits/rejected": 0.12085568904876709,
338
+ "logps/chosen": -450.1314392089844,
339
+ "logps/rejected": -587.374267578125,
340
+ "loss": 0.425,
341
+ "rewards/accuracies": 0.65625,
342
+ "rewards/chosen": -1.3817965984344482,
343
+ "rewards/margins": 1.4728713035583496,
344
+ "rewards/rejected": -2.854668140411377,
345
  "step": 240
346
  },
347
  {
348
  "epoch": 0.52,
349
  "learning_rate": 5.457052320211339e-07,
350
+ "logits/chosen": 0.09744735062122345,
351
+ "logits/rejected": -0.04311475530266762,
352
+ "logps/chosen": -561.7251586914062,
353
+ "logps/rejected": -1082.66064453125,
354
+ "loss": 0.4126,
355
+ "rewards/accuracies": 0.7437499761581421,
356
+ "rewards/chosen": -2.4967703819274902,
357
+ "rewards/margins": 5.509397029876709,
358
+ "rewards/rejected": -8.006166458129883,
359
  "step": 250
360
  },
361
  {
362
  "epoch": 0.54,
363
  "learning_rate": 5.091533134088387e-07,
364
+ "logits/chosen": 0.007685136049985886,
365
+ "logits/rejected": -0.026540469378232956,
366
+ "logps/chosen": -681.2808837890625,
367
+ "logps/rejected": -1102.198486328125,
368
+ "loss": 0.4237,
369
+ "rewards/accuracies": 0.612500011920929,
370
+ "rewards/chosen": -3.266371250152588,
371
+ "rewards/margins": 4.754992485046387,
372
+ "rewards/rejected": -8.021364212036133,
373
  "step": 260
374
  },
375
  {
376
  "epoch": 0.57,
377
  "learning_rate": 4.7255233006783624e-07,
378
+ "logits/chosen": 0.24146917462348938,
379
+ "logits/rejected": 0.05772332474589348,
380
+ "logps/chosen": -437.0887756347656,
381
+ "logps/rejected": -754.1742553710938,
382
+ "loss": 0.409,
383
  "rewards/accuracies": 0.7562500238418579,
384
+ "rewards/chosen": -1.0795494318008423,
385
+ "rewards/margins": 3.357706069946289,
386
+ "rewards/rejected": -4.437255859375,
387
  "step": 270
388
  },
389
  {
390
  "epoch": 0.59,
391
  "learning_rate": 4.3609847514019763e-07,
392
+ "logits/chosen": 0.15583154559135437,
393
+ "logits/rejected": -0.01679980382323265,
394
+ "logps/chosen": -622.4188232421875,
395
+ "logps/rejected": -1143.203857421875,
396
+ "loss": 0.4172,
397
+ "rewards/accuracies": 0.637499988079071,
398
+ "rewards/chosen": -3.2593586444854736,
399
+ "rewards/margins": 5.262009143829346,
400
+ "rewards/rejected": -8.521368980407715,
401
  "step": 280
402
  },
403
  {
404
  "epoch": 0.61,
405
  "learning_rate": 3.9998715311197783e-07,
406
+ "logits/chosen": 0.12384140491485596,
407
+ "logits/rejected": -0.03689634054899216,
408
+ "logps/chosen": -612.9854736328125,
409
+ "logps/rejected": -1161.8275146484375,
410
+ "loss": 0.4065,
411
+ "rewards/accuracies": 0.7124999761581421,
412
+ "rewards/chosen": -2.665544033050537,
413
+ "rewards/margins": 5.724797248840332,
414
+ "rewards/rejected": -8.390340805053711,
415
  "step": 290
416
  },
417
  {
418
  "epoch": 0.63,
419
  "learning_rate": 3.6441193238179146e-07,
420
+ "logits/chosen": 0.23247964680194855,
421
+ "logits/rejected": 0.08442293107509613,
422
+ "logps/chosen": -644.8258056640625,
423
+ "logps/rejected": -1333.277099609375,
424
+ "loss": 0.4067,
425
+ "rewards/accuracies": 0.7437499761581421,
426
+ "rewards/chosen": -3.5307083129882812,
427
+ "rewards/margins": 6.568638801574707,
428
+ "rewards/rejected": -10.099346160888672,
429
  "step": 300
430
  },
431
  {
432
  "epoch": 0.65,
433
  "learning_rate": 3.295635076714144e-07,
434
+ "logits/chosen": 0.21653930842876434,
435
+ "logits/rejected": -0.010667298920452595,
436
+ "logps/chosen": -576.2736206054688,
437
+ "logps/rejected": -1167.0555419921875,
438
+ "loss": 0.4003,
439
+ "rewards/accuracies": 0.7250000238418579,
440
+ "rewards/chosen": -2.805418014526367,
441
+ "rewards/margins": 5.748055458068848,
442
+ "rewards/rejected": -8.553472518920898,
443
  "step": 310
444
  },
445
  {
446
  "epoch": 0.67,
447
  "learning_rate": 2.956286778402226e-07,
448
+ "logits/chosen": 0.14956721663475037,
449
+ "logits/rejected": -0.00617391150444746,
450
+ "logps/chosen": -499.51556396484375,
451
+ "logps/rejected": -1073.225830078125,
452
+ "loss": 0.4081,
453
+ "rewards/accuracies": 0.731249988079071,
454
+ "rewards/chosen": -1.8388452529907227,
455
+ "rewards/margins": 5.99139928817749,
456
+ "rewards/rejected": -7.830244541168213,
457
  "step": 320
458
  },
459
  {
460
  "epoch": 0.69,
461
  "learning_rate": 2.6278934458271996e-07,
462
+ "logits/chosen": 0.20027479529380798,
463
+ "logits/rejected": 0.06552217900753021,
464
+ "logps/chosen": -461.4195861816406,
465
+ "logps/rejected": -1150.258544921875,
466
+ "loss": 0.4027,
467
+ "rewards/accuracies": 0.7437499761581421,
468
+ "rewards/chosen": -1.6265113353729248,
469
+ "rewards/margins": 6.768563270568848,
470
+ "rewards/rejected": -8.395073890686035,
471
  "step": 330
472
  },
473
  {
474
  "epoch": 0.71,
475
  "learning_rate": 2.312215373764551e-07,
476
+ "logits/chosen": 0.1772742122411728,
477
+ "logits/rejected": 0.058857548981904984,
478
+ "logps/chosen": -519.1689453125,
479
+ "logps/rejected": -1075.103759765625,
480
+ "loss": 0.4056,
481
+ "rewards/accuracies": 0.800000011920929,
482
+ "rewards/chosen": -2.324723720550537,
483
+ "rewards/margins": 5.8179826736450195,
484
+ "rewards/rejected": -8.142705917358398,
485
  "step": 340
486
  },
487
  {
488
  "epoch": 0.73,
489
  "learning_rate": 2.0109446990692963e-07,
490
+ "logits/chosen": 0.09322932362556458,
491
+ "logits/rejected": -0.021080341190099716,
492
+ "logps/chosen": -524.8082275390625,
493
+ "logps/rejected": -1263.429443359375,
494
+ "loss": 0.404,
495
+ "rewards/accuracies": 0.65625,
496
+ "rewards/chosen": -2.233060121536255,
497
+ "rewards/margins": 7.2954888343811035,
498
+ "rewards/rejected": -9.528549194335938,
499
  "step": 350
500
  },
501
  {
502
  "epoch": 0.75,
503
  "learning_rate": 1.725696330273575e-07,
504
+ "logits/chosen": 0.12329642474651337,
505
+ "logits/rejected": -0.045363299548625946,
506
+ "logps/chosen": -477.84747314453125,
507
+ "logps/rejected": -1159.287353515625,
508
+ "loss": 0.3987,
509
+ "rewards/accuracies": 0.71875,
510
+ "rewards/chosen": -1.4969019889831543,
511
+ "rewards/margins": 7.274144172668457,
512
+ "rewards/rejected": -8.77104663848877,
513
  "step": 360
514
  },
515
  {
516
  "epoch": 0.77,
517
  "learning_rate": 1.4579992911531496e-07,
518
+ "logits/chosen": 0.13813820481300354,
519
+ "logits/rejected": 0.06726070493459702,
520
+ "logps/chosen": -596.8673706054688,
521
+ "logps/rejected": -1229.910888671875,
522
+ "loss": 0.3989,
523
+ "rewards/accuracies": 0.731249988079071,
524
+ "rewards/chosen": -2.484358787536621,
525
+ "rewards/margins": 6.708567142486572,
526
+ "rewards/rejected": -9.192926406860352,
527
  "step": 370
528
  },
529
  {
530
  "epoch": 0.8,
531
  "learning_rate": 1.209288524664029e-07,
532
+ "logits/chosen": 0.2262219935655594,
533
+ "logits/rejected": 0.04883592948317528,
534
+ "logps/chosen": -571.9241333007812,
535
+ "logps/rejected": -1147.636474609375,
536
+ "loss": 0.3965,
537
+ "rewards/accuracies": 0.699999988079071,
538
+ "rewards/chosen": -2.7885093688964844,
539
+ "rewards/margins": 5.966012954711914,
540
+ "rewards/rejected": -8.754522323608398,
541
  "step": 380
542
  },
543
  {
544
  "epoch": 0.82,
545
  "learning_rate": 9.808972011828054e-08,
546
+ "logits/chosen": 0.13919615745544434,
547
+ "logits/rejected": 0.08005174249410629,
548
+ "logps/chosen": -603.2689208984375,
549
+ "logps/rejected": -1278.978271484375,
550
+ "loss": 0.3993,
551
+ "rewards/accuracies": 0.75,
552
+ "rewards/chosen": -2.6157753467559814,
553
+ "rewards/margins": 7.164151668548584,
554
+ "rewards/rejected": -9.779927253723145,
555
  "step": 390
556
  },
557
  {
558
  "epoch": 0.84,
559
  "learning_rate": 7.740495722810269e-08,
560
+ "logits/chosen": 0.1855761706829071,
561
+ "logits/rejected": 0.03339262679219246,
562
+ "logps/chosen": -554.6050415039062,
563
+ "logps/rejected": -1247.11474609375,
564
+ "loss": 0.4064,
565
+ "rewards/accuracies": 0.7437499761581421,
566
+ "rewards/chosen": -2.375999927520752,
567
+ "rewards/margins": 7.147269248962402,
568
+ "rewards/rejected": -9.523270606994629,
569
  "step": 400
570
  },
571
  {
572
  "epoch": 0.86,
573
  "learning_rate": 5.898544083397e-08,
574
+ "logits/chosen": 0.10612723976373672,
575
+ "logits/rejected": -0.03204170614480972,
576
+ "logps/chosen": -598.8375244140625,
577
+ "logps/rejected": -1218.921142578125,
578
+ "loss": 0.4009,
579
+ "rewards/accuracies": 0.6625000238418579,
580
+ "rewards/chosen": -3.0963997840881348,
581
+ "rewards/margins": 6.186778545379639,
582
+ "rewards/rejected": -9.283178329467773,
583
  "step": 410
584
  },
585
  {
586
  "epoch": 0.88,
587
  "learning_rate": 4.292990551804171e-08,
588
+ "logits/chosen": 0.3134514391422272,
589
+ "logits/rejected": 0.1133495420217514,
590
+ "logps/chosen": -560.297607421875,
591
+ "logps/rejected": -1385.083251953125,
592
+ "loss": 0.3991,
593
+ "rewards/accuracies": 0.699999988079071,
594
+ "rewards/chosen": -2.669637680053711,
595
+ "rewards/margins": 8.3246488571167,
596
+ "rewards/rejected": -10.994285583496094,
597
  "step": 420
598
  },
599
  {
600
  "epoch": 0.9,
601
  "learning_rate": 2.9324414157151367e-08,
602
+ "logits/chosen": 0.14708609879016876,
603
+ "logits/rejected": 0.05113764852285385,
604
+ "logps/chosen": -646.3408203125,
605
+ "logps/rejected": -1521.79345703125,
606
+ "loss": 0.3999,
607
+ "rewards/accuracies": 0.75,
608
+ "rewards/chosen": -3.122638702392578,
609
+ "rewards/margins": 8.8574800491333,
610
+ "rewards/rejected": -11.980117797851562,
611
  "step": 430
612
  },
613
  {
614
  "epoch": 0.92,
615
  "learning_rate": 1.824189659787284e-08,
616
+ "logits/chosen": 0.19891302287578583,
617
+ "logits/rejected": 0.057393454015254974,
618
+ "logps/chosen": -530.86865234375,
619
+ "logps/rejected": -1372.778076171875,
620
+ "loss": 0.3979,
621
+ "rewards/accuracies": 0.6875,
622
+ "rewards/chosen": -2.4631145000457764,
623
+ "rewards/margins": 8.486894607543945,
624
+ "rewards/rejected": -10.950007438659668,
625
  "step": 440
626
  },
627
  {
628
  "epoch": 0.94,
629
  "learning_rate": 9.741758728888217e-09,
630
+ "logits/chosen": 0.20876403152942657,
631
+ "logits/rejected": 0.052755843847990036,
632
+ "logps/chosen": -683.3274536132812,
633
+ "logps/rejected": -1404.552978515625,
634
+ "loss": 0.3915,
635
  "rewards/accuracies": 0.71875,
636
+ "rewards/chosen": -3.3337600231170654,
637
+ "rewards/margins": 7.872265815734863,
638
+ "rewards/rejected": -11.206026077270508,
639
  "step": 450
640
  },
641
  {
642
  "epoch": 0.96,
643
  "learning_rate": 3.869564046156459e-09,
644
+ "logits/chosen": 0.2985457181930542,
645
+ "logits/rejected": 0.15650448203086853,
646
+ "logps/chosen": -468.8932189941406,
647
+ "logps/rejected": -1197.56201171875,
648
+ "loss": 0.3987,
649
+ "rewards/accuracies": 0.699999988079071,
650
+ "rewards/chosen": -1.7095565795898438,
651
+ "rewards/margins": 7.608504295349121,
652
+ "rewards/rejected": -9.318059921264648,
653
  "step": 460
654
  },
655
  {
656
  "epoch": 0.98,
657
  "learning_rate": 6.567894177967325e-10,
658
+ "logits/chosen": 0.17393910884857178,
659
+ "logits/rejected": 0.02789122983813286,
660
+ "logps/chosen": -607.3438720703125,
661
+ "logps/rejected": -1505.235595703125,
662
+ "loss": 0.3978,
663
+ "rewards/accuracies": 0.768750011920929,
664
+ "rewards/chosen": -2.6819469928741455,
665
+ "rewards/margins": 9.39558219909668,
666
+ "rewards/rejected": -12.07752799987793,
667
  "step": 470
668
  },
669
  {
670
  "epoch": 1.0,
671
  "step": 477,
672
  "total_flos": 0.0,
673
+ "train_loss": 0.42718374404267445,
674
+ "train_runtime": 6325.1171,
675
+ "train_samples_per_second": 9.665,
676
  "train_steps_per_second": 0.075
677
  }
678
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cec4624b9c095040eb8aa52a9ba592de199b303541db109644b3cf58d7c369c4
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b021e642df60d4e058f25e7642bbe07c40ea3fe4bd81ba446202dbd4f17079f4
3
  size 6648