wzhouad commited on
Commit
939d930
1 Parent(s): 124d1d4

Model save

Browse files
README.md CHANGED
@@ -37,7 +37,7 @@ The following hyperparameters were used during training:
37
  - learning_rate: 5e-07
38
  - train_batch_size: 8
39
  - eval_batch_size: 8
40
- - seed: 1
41
  - distributed_type: multi-GPU
42
  - num_devices: 8
43
  - gradient_accumulation_steps: 2
 
37
  - learning_rate: 5e-07
38
  - train_batch_size: 8
39
  - eval_batch_size: 8
40
+ - seed: 2
41
  - distributed_type: multi-GPU
42
  - num_devices: 8
43
  - gradient_accumulation_steps: 2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.1290441479947832,
4
- "train_runtime": 3205.4322,
5
  "train_samples": 51894,
6
- "train_samples_per_second": 16.189,
7
  "train_steps_per_second": 0.126
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.13281457475674005,
4
+ "train_runtime": 3219.0486,
5
  "train_samples": 51894,
6
+ "train_samples_per_second": 16.121,
7
  "train_steps_per_second": 0.126
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63eebf66ef032e0273cefa649145ef22ac9d7c7a15a63949bfab2dd82eea3b99
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab46e8124d9cacc43920ed6943b79f01ec0d55f715b8588dba66c609a2b1d4b
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a18853d43d98a4783811f48aabfdff6a22b3723a0bf973d97dcc5d02e52f01f3
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d07be1111d3b5bc2c14ffdaf4371c594ea83fb6a105d7bbceaeeb9d43d3f17b
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b07e126a50b2f207cc01c4d63a5c95af42502791e0e97926155fd6c343bfe98
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74f1ed244a41d859eb13abcea7c5eee6a043a8a2c39a674f91dbbfc0cf7e2a35
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.1290441479947832,
4
- "train_runtime": 3205.4322,
5
  "train_samples": 51894,
6
- "train_samples_per_second": 16.189,
7
  "train_steps_per_second": 0.126
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.13281457475674005,
4
+ "train_runtime": 3219.0486,
5
  "train_samples": 51894,
6
+ "train_samples_per_second": 16.121,
7
  "train_steps_per_second": 0.126
8
  }
trainer_state.json CHANGED
@@ -11,11 +11,11 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.2195121951219512e-08,
14
- "logits/chosen": -2.769179582595825,
15
- "logits/rejected": -2.6384379863739014,
16
- "logps/chosen": -511.7125244140625,
17
- "logps/rejected": -154.70135498046875,
18
- "loss": 0.4155,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
@@ -25,570 +25,570 @@
25
  {
26
  "epoch": 0.02,
27
  "learning_rate": 1.219512195121951e-07,
28
- "logits/chosen": -2.7492480278015137,
29
- "logits/rejected": -2.7196333408355713,
30
- "logps/chosen": -350.17138671875,
31
- "logps/rejected": -113.88729095458984,
32
- "loss": 0.427,
33
- "rewards/accuracies": 0.5833333134651184,
34
- "rewards/chosen": 0.0007148745935410261,
35
- "rewards/margins": 0.0013887248933315277,
36
- "rewards/rejected": -0.0006738504162058234,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.05,
41
  "learning_rate": 2.439024390243902e-07,
42
- "logits/chosen": -2.73565936088562,
43
- "logits/rejected": -2.697619676589966,
44
- "logps/chosen": -417.7752990722656,
45
- "logps/rejected": -126.06368255615234,
46
- "loss": 0.4206,
47
- "rewards/accuracies": 0.7749999761581421,
48
- "rewards/chosen": 0.016129938885569572,
49
- "rewards/margins": 0.031221503391861916,
50
- "rewards/rejected": -0.015091565437614918,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 3.6585365853658536e-07,
56
- "logits/chosen": -2.701590061187744,
57
- "logits/rejected": -2.6967225074768066,
58
- "logps/chosen": -394.0975646972656,
59
- "logps/rejected": -112.23805236816406,
60
- "loss": 0.4044,
61
- "rewards/accuracies": 0.768750011920929,
62
- "rewards/chosen": 0.06890617311000824,
63
- "rewards/margins": 0.18525615334510803,
64
- "rewards/rejected": -0.1163499727845192,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 4.878048780487804e-07,
70
- "logits/chosen": -2.5483059883117676,
71
- "logits/rejected": -2.5507187843322754,
72
- "logps/chosen": -388.41693115234375,
73
- "logps/rejected": -163.46432495117188,
74
- "loss": 0.3829,
75
- "rewards/accuracies": 0.75,
76
- "rewards/chosen": 0.02780727483332157,
77
- "rewards/margins": 0.41306740045547485,
78
- "rewards/rejected": -0.38526007533073425,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.12,
83
  "learning_rate": 4.992461696250783e-07,
84
- "logits/chosen": -2.425833225250244,
85
- "logits/rejected": -2.388296604156494,
86
- "logps/chosen": -441.8356018066406,
87
- "logps/rejected": -209.2742156982422,
88
- "loss": 0.3064,
89
- "rewards/accuracies": 0.8187500238418579,
90
- "rewards/chosen": -0.09178180992603302,
91
- "rewards/margins": 0.8215526342391968,
92
- "rewards/rejected": -0.9133344888687134,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 4.966461721767899e-07,
98
- "logits/chosen": -2.3990914821624756,
99
- "logits/rejected": -2.3541452884674072,
100
- "logps/chosen": -423.5923767089844,
101
- "logps/rejected": -237.95361328125,
102
- "loss": 0.2477,
103
- "rewards/accuracies": 0.793749988079071,
104
- "rewards/chosen": -0.30225870013237,
105
- "rewards/margins": 1.0505648851394653,
106
- "rewards/rejected": -1.3528234958648682,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 4.922100518015975e-07,
112
- "logits/chosen": -2.414358615875244,
113
- "logits/rejected": -2.3809196949005127,
114
- "logps/chosen": -453.24755859375,
115
- "logps/rejected": -317.23309326171875,
116
- "loss": 0.1919,
117
- "rewards/accuracies": 0.7749999761581421,
118
- "rewards/chosen": -0.5829191207885742,
119
- "rewards/margins": 1.3952158689498901,
120
- "rewards/rejected": -1.978135108947754,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.2,
125
  "learning_rate": 4.859708325770919e-07,
126
- "logits/chosen": -2.3892292976379395,
127
- "logits/rejected": -2.3620591163635254,
128
- "logps/chosen": -466.544921875,
129
- "logps/rejected": -345.60174560546875,
130
- "loss": 0.1845,
131
- "rewards/accuracies": 0.7250000238418579,
132
- "rewards/chosen": -0.6911368370056152,
133
- "rewards/margins": 1.3688609600067139,
134
- "rewards/rejected": -2.05999755859375,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.22,
139
  "learning_rate": 4.779749614980225e-07,
140
- "logits/chosen": -2.3470118045806885,
141
- "logits/rejected": -2.2671432495117188,
142
- "logps/chosen": -550.2689208984375,
143
- "logps/rejected": -383.6993713378906,
144
- "loss": 0.1375,
145
- "rewards/accuracies": 0.824999988079071,
146
- "rewards/chosen": -0.528198778629303,
147
- "rewards/margins": 2.0589098930358887,
148
- "rewards/rejected": -2.587108612060547,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.25,
153
  "learning_rate": 4.682819627081427e-07,
154
- "logits/chosen": -2.359837293624878,
155
- "logits/rejected": -2.2484238147735596,
156
- "logps/chosen": -502.87939453125,
157
- "logps/rejected": -352.00054931640625,
158
- "loss": 0.1565,
159
- "rewards/accuracies": 0.875,
160
- "rewards/chosen": -0.47763410210609436,
161
- "rewards/margins": 1.936570167541504,
162
- "rewards/rejected": -2.4142043590545654,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 0.27,
167
  "learning_rate": 4.569639943810477e-07,
168
- "logits/chosen": -2.303628444671631,
169
- "logits/rejected": -2.1916663646698,
170
- "logps/chosen": -495.01739501953125,
171
- "logps/rejected": -347.0151062011719,
172
- "loss": 0.122,
173
- "rewards/accuracies": 0.8187500238418579,
174
- "rewards/chosen": -0.5454806685447693,
175
- "rewards/margins": 1.8509422540664673,
176
- "rewards/rejected": -2.396422863006592,
177
  "step": 110
178
  },
179
  {
180
  "epoch": 0.3,
181
  "learning_rate": 4.4410531154874543e-07,
182
- "logits/chosen": -2.3315651416778564,
183
- "logits/rejected": -2.2395036220550537,
184
- "logps/chosen": -450.09100341796875,
185
- "logps/rejected": -360.4415588378906,
186
- "loss": 0.1182,
187
- "rewards/accuracies": 0.824999988079071,
188
- "rewards/chosen": -0.5643357038497925,
189
- "rewards/margins": 1.957601547241211,
190
- "rewards/rejected": -2.521937131881714,
191
  "step": 120
192
  },
193
  {
194
  "epoch": 0.32,
195
  "learning_rate": 4.298016388768561e-07,
196
- "logits/chosen": -2.3046891689300537,
197
- "logits/rejected": -2.1809306144714355,
198
- "logps/chosen": -464.0572814941406,
199
- "logps/rejected": -396.9075622558594,
200
- "loss": 0.1128,
201
- "rewards/accuracies": 0.862500011920929,
202
- "rewards/chosen": -0.93115234375,
203
- "rewards/margins": 1.978463888168335,
204
- "rewards/rejected": -2.909615993499756,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 0.35,
209
  "learning_rate": 4.1415945805573005e-07,
210
- "logits/chosen": -2.364520311355591,
211
- "logits/rejected": -2.2930567264556885,
212
- "logps/chosen": -455.96771240234375,
213
- "logps/rejected": -342.2510986328125,
214
- "loss": 0.1463,
215
- "rewards/accuracies": 0.8062499761581421,
216
- "rewards/chosen": -0.5802963972091675,
217
- "rewards/margins": 1.6709789037704468,
218
- "rewards/rejected": -2.2512753009796143,
219
  "step": 140
220
  },
221
  {
222
  "epoch": 0.37,
223
  "learning_rate": 3.972952151123984e-07,
224
- "logits/chosen": -2.3566527366638184,
225
- "logits/rejected": -2.2735893726348877,
226
- "logps/chosen": -478.54693603515625,
227
- "logps/rejected": -365.7146911621094,
228
- "loss": 0.1316,
229
- "rewards/accuracies": 0.8062499761581421,
230
- "rewards/chosen": -0.9140907526016235,
231
- "rewards/margins": 1.685943365097046,
232
- "rewards/rejected": -2.600034236907959,
233
  "step": 150
234
  },
235
  {
236
  "epoch": 0.39,
237
  "learning_rate": 3.793344535444142e-07,
238
- "logits/chosen": -2.2995688915252686,
239
- "logits/rejected": -2.1954402923583984,
240
- "logps/chosen": -570.6788330078125,
241
- "logps/rejected": -437.27130126953125,
242
- "loss": 0.0968,
243
- "rewards/accuracies": 0.8374999761581421,
244
- "rewards/chosen": -1.210545301437378,
245
- "rewards/margins": 1.947977066040039,
246
- "rewards/rejected": -3.158522367477417,
247
  "step": 160
248
  },
249
  {
250
  "epoch": 0.42,
251
  "learning_rate": 3.604108797288461e-07,
252
- "logits/chosen": -2.310495615005493,
253
- "logits/rejected": -2.227719306945801,
254
- "logps/chosen": -477.73968505859375,
255
- "logps/rejected": -394.82891845703125,
256
- "loss": 0.1053,
257
- "rewards/accuracies": 0.84375,
258
- "rewards/chosen": -1.0732296705245972,
259
- "rewards/margins": 1.7702564001083374,
260
- "rewards/rejected": -2.8434860706329346,
261
  "step": 170
262
  },
263
  {
264
  "epoch": 0.44,
265
  "learning_rate": 3.40665367563858e-07,
266
- "logits/chosen": -2.3077213764190674,
267
- "logits/rejected": -2.2177042961120605,
268
- "logps/chosen": -518.1863403320312,
269
- "logps/rejected": -416.206787109375,
270
- "loss": 0.0817,
271
- "rewards/accuracies": 0.8187500238418579,
272
- "rewards/chosen": -0.9166792631149292,
273
- "rewards/margins": 2.109100341796875,
274
- "rewards/rejected": -3.0257794857025146,
275
  "step": 180
276
  },
277
  {
278
  "epoch": 0.47,
279
  "learning_rate": 3.202449097526798e-07,
280
- "logits/chosen": -2.2818238735198975,
281
- "logits/rejected": -2.187276840209961,
282
- "logps/chosen": -522.6356201171875,
283
- "logps/rejected": -490.2731018066406,
284
- "loss": 0.067,
285
- "rewards/accuracies": 0.856249988079071,
286
- "rewards/chosen": -1.4796288013458252,
287
- "rewards/margins": 2.3061554431915283,
288
- "rewards/rejected": -3.7857837677001953,
289
  "step": 190
290
  },
291
  {
292
  "epoch": 0.49,
293
  "learning_rate": 2.993015235369905e-07,
294
- "logits/chosen": -2.2447495460510254,
295
- "logits/rejected": -2.1707584857940674,
296
- "logps/chosen": -549.8089599609375,
297
- "logps/rejected": -516.205078125,
298
- "loss": 0.0611,
299
- "rewards/accuracies": 0.78125,
300
- "rewards/chosen": -1.6697288751602173,
301
- "rewards/margins": 2.298145055770874,
302
- "rewards/rejected": -3.9678738117218018,
303
  "step": 200
304
  },
305
  {
306
  "epoch": 0.52,
307
  "learning_rate": 2.7799111902582693e-07,
308
- "logits/chosen": -2.321493625640869,
309
- "logits/rejected": -2.215440511703491,
310
- "logps/chosen": -575.6776123046875,
311
- "logps/rejected": -493.73272705078125,
312
- "loss": 0.0744,
313
- "rewards/accuracies": 0.831250011920929,
314
- "rewards/chosen": -1.159621000289917,
315
- "rewards/margins": 2.540497303009033,
316
- "rewards/rejected": -3.70011830329895,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 0.54,
321
  "learning_rate": 2.564723385445869e-07,
322
- "logits/chosen": -2.2707631587982178,
323
- "logits/rejected": -2.2030460834503174,
324
- "logps/chosen": -501.503173828125,
325
- "logps/rejected": -449.3731384277344,
326
- "loss": 0.08,
327
- "rewards/accuracies": 0.8062499761581421,
328
- "rewards/chosen": -1.1590577363967896,
329
- "rewards/margins": 2.1603972911834717,
330
- "rewards/rejected": -3.3194549083709717,
331
  "step": 220
332
  },
333
  {
334
  "epoch": 0.57,
335
  "learning_rate": 2.3490537564442845e-07,
336
- "logits/chosen": -2.287022352218628,
337
- "logits/rejected": -2.1958324909210205,
338
- "logps/chosen": -529.8822631835938,
339
- "logps/rejected": -451.2649841308594,
340
- "loss": 0.089,
341
- "rewards/accuracies": 0.8500000238418579,
342
- "rewards/chosen": -1.0163233280181885,
343
- "rewards/margins": 2.2578654289245605,
344
- "rewards/rejected": -3.274188995361328,
345
  "step": 230
346
  },
347
  {
348
  "epoch": 0.59,
349
  "learning_rate": 2.1345078256378801e-07,
350
- "logits/chosen": -2.2853665351867676,
351
- "logits/rejected": -2.1983203887939453,
352
- "logps/chosen": -590.9471435546875,
353
- "logps/rejected": -492.50372314453125,
354
- "loss": 0.0612,
355
- "rewards/accuracies": 0.8187500238418579,
356
- "rewards/chosen": -1.4521348476409912,
357
- "rewards/margins": 2.383617401123047,
358
- "rewards/rejected": -3.835752487182617,
359
  "step": 240
360
  },
361
  {
362
  "epoch": 0.62,
363
  "learning_rate": 1.9226827501969865e-07,
364
- "logits/chosen": -2.2573533058166504,
365
- "logits/rejected": -2.1343369483947754,
366
- "logps/chosen": -548.763427734375,
367
- "logps/rejected": -487.57623291015625,
368
- "loss": 0.0627,
369
- "rewards/accuracies": 0.862500011920929,
370
- "rewards/chosen": -1.417848825454712,
371
- "rewards/margins": 2.380465030670166,
372
- "rewards/rejected": -3.798313856124878,
373
  "step": 250
374
  },
375
  {
376
  "epoch": 0.64,
377
  "learning_rate": 1.715155432264775e-07,
378
- "logits/chosen": -2.249309539794922,
379
- "logits/rejected": -2.155247688293457,
380
- "logps/chosen": -520.6065063476562,
381
- "logps/rejected": -490.1532287597656,
382
- "loss": 0.0677,
383
- "rewards/accuracies": 0.824999988079071,
384
- "rewards/chosen": -1.445723295211792,
385
- "rewards/margins": 2.3194212913513184,
386
- "rewards/rejected": -3.7651443481445312,
387
  "step": 260
388
  },
389
  {
390
  "epoch": 0.67,
391
  "learning_rate": 1.51347077992983e-07,
392
- "logits/chosen": -2.260199546813965,
393
- "logits/rejected": -2.167729616165161,
394
- "logps/chosen": -552.5299072265625,
395
- "logps/rejected": -480.06475830078125,
396
- "loss": 0.07,
397
- "rewards/accuracies": 0.90625,
398
- "rewards/chosen": -1.2369455099105835,
399
- "rewards/margins": 2.399855375289917,
400
- "rewards/rejected": -3.6368002891540527,
401
  "step": 270
402
  },
403
  {
404
  "epoch": 0.69,
405
  "learning_rate": 1.3191302063739906e-07,
406
- "logits/chosen": -2.2963688373565674,
407
- "logits/rejected": -2.2376794815063477,
408
- "logps/chosen": -517.3768310546875,
409
- "logps/rejected": -425.8411560058594,
410
- "loss": 0.07,
411
- "rewards/accuracies": 0.84375,
412
- "rewards/chosen": -1.1372196674346924,
413
- "rewards/margins": 2.0433435440063477,
414
- "rewards/rejected": -3.180562973022461,
415
  "step": 280
416
  },
417
  {
418
  "epoch": 0.72,
419
  "learning_rate": 1.1335804528119475e-07,
420
- "logits/chosen": -2.256326913833618,
421
- "logits/rejected": -2.1535654067993164,
422
- "logps/chosen": -577.1251220703125,
423
- "logps/rejected": -531.5836791992188,
424
- "loss": 0.0683,
425
- "rewards/accuracies": 0.875,
426
- "rewards/chosen": -1.3688971996307373,
427
- "rewards/margins": 2.631861448287964,
428
- "rewards/rejected": -4.000759124755859,
429
  "step": 290
430
  },
431
  {
432
  "epoch": 0.74,
433
  "learning_rate": 9.582028184286423e-08,
434
- "logits/chosen": -2.202509880065918,
435
- "logits/rejected": -2.116055727005005,
436
- "logps/chosen": -553.2191772460938,
437
- "logps/rejected": -521.0760498046875,
438
- "loss": 0.0577,
439
- "rewards/accuracies": 0.8062499761581421,
440
- "rewards/chosen": -1.7238006591796875,
441
- "rewards/margins": 2.396298885345459,
442
- "rewards/rejected": -4.1200995445251465,
443
  "step": 300
444
  },
445
  {
446
  "epoch": 0.76,
447
  "learning_rate": 7.943028774907065e-08,
448
- "logits/chosen": -2.257730484008789,
449
- "logits/rejected": -2.1503612995147705,
450
- "logps/chosen": -599.2135009765625,
451
- "logps/rejected": -497.50384521484375,
452
- "loss": 0.0563,
453
- "rewards/accuracies": 0.84375,
454
- "rewards/chosen": -1.4197520017623901,
455
- "rewards/margins": 2.4658217430114746,
456
- "rewards/rejected": -3.885573625564575,
457
  "step": 310
458
  },
459
  {
460
  "epoch": 0.79,
461
  "learning_rate": 6.431007601814637e-08,
462
- "logits/chosen": -2.226001262664795,
463
- "logits/rejected": -2.1242711544036865,
464
- "logps/chosen": -561.7794799804688,
465
- "logps/rejected": -525.1495361328125,
466
- "loss": 0.0589,
467
- "rewards/accuracies": 0.8500000238418579,
468
- "rewards/chosen": -1.6232330799102783,
469
- "rewards/margins": 2.387523889541626,
470
- "rewards/rejected": -4.010756969451904,
471
  "step": 320
472
  },
473
  {
474
  "epoch": 0.81,
475
  "learning_rate": 5.0572206951246e-08,
476
- "logits/chosen": -2.2513046264648438,
477
- "logits/rejected": -2.1654863357543945,
478
- "logps/chosen": -513.5191650390625,
479
- "logps/rejected": -491.4129333496094,
480
- "loss": 0.0596,
481
- "rewards/accuracies": 0.8125,
482
- "rewards/chosen": -1.6228710412979126,
483
- "rewards/margins": 2.1468024253845215,
484
- "rewards/rejected": -3.7696735858917236,
485
  "step": 330
486
  },
487
  {
488
  "epoch": 0.84,
489
  "learning_rate": 3.831895019292897e-08,
490
- "logits/chosen": -2.284156084060669,
491
- "logits/rejected": -2.184755325317383,
492
- "logps/chosen": -595.8388061523438,
493
- "logps/rejected": -532.6061401367188,
494
- "loss": 0.0584,
495
- "rewards/accuracies": 0.862500011920929,
496
- "rewards/chosen": -1.4855483770370483,
497
- "rewards/margins": 2.6151788234710693,
498
- "rewards/rejected": -4.100727558135986,
499
  "step": 340
500
  },
501
  {
502
  "epoch": 0.86,
503
  "learning_rate": 2.764152339909756e-08,
504
- "logits/chosen": -2.272153377532959,
505
- "logits/rejected": -2.180081844329834,
506
- "logps/chosen": -596.3790283203125,
507
- "logps/rejected": -511.85760498046875,
508
- "loss": 0.0692,
509
- "rewards/accuracies": 0.831250011920929,
510
- "rewards/chosen": -1.242677092552185,
511
- "rewards/margins": 2.682408094406128,
512
- "rewards/rejected": -3.9250850677490234,
513
  "step": 350
514
  },
515
  {
516
  "epoch": 0.89,
517
  "learning_rate": 1.861941317991664e-08,
518
- "logits/chosen": -2.1758205890655518,
519
- "logits/rejected": -2.1184608936309814,
520
- "logps/chosen": -529.5315551757812,
521
- "logps/rejected": -503.5777893066406,
522
- "loss": 0.0542,
523
- "rewards/accuracies": 0.8125,
524
- "rewards/chosen": -1.6654908657073975,
525
- "rewards/margins": 2.209399700164795,
526
- "rewards/rejected": -3.8748905658721924,
527
  "step": 360
528
  },
529
  {
530
  "epoch": 0.91,
531
  "learning_rate": 1.13197833728636e-08,
532
- "logits/chosen": -2.2538328170776367,
533
- "logits/rejected": -2.175743818283081,
534
- "logps/chosen": -569.5551147460938,
535
- "logps/rejected": -514.572509765625,
536
- "loss": 0.0603,
537
- "rewards/accuracies": 0.8187500238418579,
538
- "rewards/chosen": -1.7610028982162476,
539
- "rewards/margins": 2.1565301418304443,
540
- "rewards/rejected": -3.9175331592559814,
541
  "step": 370
542
  },
543
  {
544
  "epoch": 0.94,
545
  "learning_rate": 5.79697505093521e-09,
546
- "logits/chosen": -2.308776378631592,
547
- "logits/rejected": -2.193223237991333,
548
- "logps/chosen": -585.7466430664062,
549
- "logps/rejected": -526.7449340820312,
550
- "loss": 0.0563,
551
- "rewards/accuracies": 0.875,
552
- "rewards/chosen": -1.3321974277496338,
553
- "rewards/margins": 2.728519916534424,
554
- "rewards/rejected": -4.0607171058654785,
555
  "step": 380
556
  },
557
  {
558
  "epoch": 0.96,
559
  "learning_rate": 2.092101988131256e-09,
560
- "logits/chosen": -2.2692370414733887,
561
- "logits/rejected": -2.1535143852233887,
562
- "logps/chosen": -583.334716796875,
563
- "logps/rejected": -501.6742248535156,
564
- "loss": 0.0581,
565
- "rewards/accuracies": 0.84375,
566
- "rewards/chosen": -1.541758418083191,
567
- "rewards/margins": 2.3584675788879395,
568
- "rewards/rejected": -3.90022611618042,
569
  "step": 390
570
  },
571
  {
572
  "epoch": 0.99,
573
  "learning_rate": 2.327445937151673e-10,
574
- "logits/chosen": -2.2919979095458984,
575
- "logits/rejected": -2.1775660514831543,
576
- "logps/chosen": -602.81201171875,
577
- "logps/rejected": -531.6156616210938,
578
- "loss": 0.0554,
579
- "rewards/accuracies": 0.875,
580
- "rewards/chosen": -1.604543924331665,
581
- "rewards/margins": 2.4634101390838623,
582
- "rewards/rejected": -4.067954063415527,
583
  "step": 400
584
  },
585
  {
586
  "epoch": 1.0,
587
  "step": 405,
588
  "total_flos": 0.0,
589
- "train_loss": 0.1290441479947832,
590
- "train_runtime": 3205.4322,
591
- "train_samples_per_second": 16.189,
592
  "train_steps_per_second": 0.126
593
  }
594
  ],
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.2195121951219512e-08,
14
+ "logits/chosen": -2.8412017822265625,
15
+ "logits/rejected": -2.798035144805908,
16
+ "logps/chosen": -481.1488037109375,
17
+ "logps/rejected": -89.30835723876953,
18
+ "loss": 0.4176,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
 
25
  {
26
  "epoch": 0.02,
27
  "learning_rate": 1.219512195121951e-07,
28
+ "logits/chosen": -2.760530948638916,
29
+ "logits/rejected": -2.7219057083129883,
30
+ "logps/chosen": -418.49639892578125,
31
+ "logps/rejected": -116.74214935302734,
32
+ "loss": 0.4186,
33
+ "rewards/accuracies": 0.5486111044883728,
34
+ "rewards/chosen": 0.0008742791833356023,
35
+ "rewards/margins": 0.0011628220090642571,
36
+ "rewards/rejected": -0.0002885429421439767,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.05,
41
  "learning_rate": 2.439024390243902e-07,
42
+ "logits/chosen": -2.785912036895752,
43
+ "logits/rejected": -2.7400879859924316,
44
+ "logps/chosen": -395.93634033203125,
45
+ "logps/rejected": -109.28425598144531,
46
+ "loss": 0.4276,
47
+ "rewards/accuracies": 0.737500011920929,
48
+ "rewards/chosen": 0.014862718991935253,
49
+ "rewards/margins": 0.029387209564447403,
50
+ "rewards/rejected": -0.014524489641189575,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 3.6585365853658536e-07,
56
+ "logits/chosen": -2.70774507522583,
57
+ "logits/rejected": -2.6736245155334473,
58
+ "logps/chosen": -375.3507385253906,
59
+ "logps/rejected": -121.47686767578125,
60
+ "loss": 0.4137,
61
+ "rewards/accuracies": 0.7749999761581421,
62
+ "rewards/chosen": 0.06373313069343567,
63
+ "rewards/margins": 0.1794833242893219,
64
+ "rewards/rejected": -0.11575017869472504,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 4.878048780487804e-07,
70
+ "logits/chosen": -2.5397536754608154,
71
+ "logits/rejected": -2.5152153968811035,
72
+ "logps/chosen": -426.83306884765625,
73
+ "logps/rejected": -164.6002960205078,
74
+ "loss": 0.3837,
75
+ "rewards/accuracies": 0.831250011920929,
76
+ "rewards/chosen": 0.06933724880218506,
77
+ "rewards/margins": 0.513908326625824,
78
+ "rewards/rejected": -0.4445711076259613,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.12,
83
  "learning_rate": 4.992461696250783e-07,
84
+ "logits/chosen": -2.425987958908081,
85
+ "logits/rejected": -2.42197585105896,
86
+ "logps/chosen": -366.08343505859375,
87
+ "logps/rejected": -200.18582153320312,
88
+ "loss": 0.3126,
89
+ "rewards/accuracies": 0.7749999761581421,
90
+ "rewards/chosen": -0.1581335961818695,
91
+ "rewards/margins": 0.757983386516571,
92
+ "rewards/rejected": -0.9161170721054077,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 4.966461721767899e-07,
98
+ "logits/chosen": -2.3848328590393066,
99
+ "logits/rejected": -2.298985719680786,
100
+ "logps/chosen": -490.4847106933594,
101
+ "logps/rejected": -254.928466796875,
102
+ "loss": 0.2321,
103
+ "rewards/accuracies": 0.8187500238418579,
104
+ "rewards/chosen": -0.22150389850139618,
105
+ "rewards/margins": 1.2847968339920044,
106
+ "rewards/rejected": -1.506300926208496,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 4.922100518015975e-07,
112
+ "logits/chosen": -2.340442180633545,
113
+ "logits/rejected": -2.2820117473602295,
114
+ "logps/chosen": -490.87615966796875,
115
+ "logps/rejected": -321.3214111328125,
116
+ "loss": 0.1643,
117
+ "rewards/accuracies": 0.8062499761581421,
118
+ "rewards/chosen": -0.6543983817100525,
119
+ "rewards/margins": 1.5231101512908936,
120
+ "rewards/rejected": -2.17750883102417,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.2,
125
  "learning_rate": 4.859708325770919e-07,
126
+ "logits/chosen": -2.3740787506103516,
127
+ "logits/rejected": -2.3113033771514893,
128
+ "logps/chosen": -471.11651611328125,
129
+ "logps/rejected": -314.3768005371094,
130
+ "loss": 0.182,
131
+ "rewards/accuracies": 0.762499988079071,
132
+ "rewards/chosen": -0.38558533787727356,
133
+ "rewards/margins": 1.7214374542236328,
134
+ "rewards/rejected": -2.107023000717163,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.22,
139
  "learning_rate": 4.779749614980225e-07,
140
+ "logits/chosen": -2.2898011207580566,
141
+ "logits/rejected": -2.192032814025879,
142
+ "logps/chosen": -496.12384033203125,
143
+ "logps/rejected": -380.75909423828125,
144
+ "loss": 0.1376,
145
+ "rewards/accuracies": 0.793749988079071,
146
+ "rewards/chosen": -0.8930786848068237,
147
+ "rewards/margins": 1.819786787033081,
148
+ "rewards/rejected": -2.7128653526306152,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.25,
153
  "learning_rate": 4.682819627081427e-07,
154
+ "logits/chosen": -2.363102436065674,
155
+ "logits/rejected": -2.2812604904174805,
156
+ "logps/chosen": -489.04302978515625,
157
+ "logps/rejected": -326.9526062011719,
158
+ "loss": 0.1424,
159
+ "rewards/accuracies": 0.8187500238418579,
160
+ "rewards/chosen": -0.5512069463729858,
161
+ "rewards/margins": 1.6478255987167358,
162
+ "rewards/rejected": -2.1990325450897217,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 0.27,
167
  "learning_rate": 4.569639943810477e-07,
168
+ "logits/chosen": -2.3185834884643555,
169
+ "logits/rejected": -2.2058892250061035,
170
+ "logps/chosen": -502.36529541015625,
171
+ "logps/rejected": -393.0018615722656,
172
+ "loss": 0.1052,
173
+ "rewards/accuracies": 0.7875000238418579,
174
+ "rewards/chosen": -0.6747262477874756,
175
+ "rewards/margins": 2.091235399246216,
176
+ "rewards/rejected": -2.7659618854522705,
177
  "step": 110
178
  },
179
  {
180
  "epoch": 0.3,
181
  "learning_rate": 4.4410531154874543e-07,
182
+ "logits/chosen": -2.242546558380127,
183
+ "logits/rejected": -2.098257541656494,
184
+ "logps/chosen": -527.9982299804688,
185
+ "logps/rejected": -425.9187927246094,
186
+ "loss": 0.1028,
187
+ "rewards/accuracies": 0.8062499761581421,
188
+ "rewards/chosen": -1.0199496746063232,
189
+ "rewards/margins": 2.1254653930664062,
190
+ "rewards/rejected": -3.1454153060913086,
191
  "step": 120
192
  },
193
  {
194
  "epoch": 0.32,
195
  "learning_rate": 4.298016388768561e-07,
196
+ "logits/chosen": -2.2639729976654053,
197
+ "logits/rejected": -2.1410276889801025,
198
+ "logps/chosen": -474.557373046875,
199
+ "logps/rejected": -406.2401123046875,
200
+ "loss": 0.102,
201
+ "rewards/accuracies": 0.8812500238418579,
202
+ "rewards/chosen": -0.7631456851959229,
203
+ "rewards/margins": 2.1880364418029785,
204
+ "rewards/rejected": -2.9511821269989014,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 0.35,
209
  "learning_rate": 4.1415945805573005e-07,
210
+ "logits/chosen": -2.2353649139404297,
211
+ "logits/rejected": -2.103256940841675,
212
+ "logps/chosen": -494.5367126464844,
213
+ "logps/rejected": -413.3063049316406,
214
+ "loss": 0.109,
215
+ "rewards/accuracies": 0.8125,
216
+ "rewards/chosen": -0.6633724570274353,
217
+ "rewards/margins": 2.3635334968566895,
218
+ "rewards/rejected": -3.0269057750701904,
219
  "step": 140
220
  },
221
  {
222
  "epoch": 0.37,
223
  "learning_rate": 3.972952151123984e-07,
224
+ "logits/chosen": -2.243504762649536,
225
+ "logits/rejected": -2.1586227416992188,
226
+ "logps/chosen": -485.9297790527344,
227
+ "logps/rejected": -403.07904052734375,
228
+ "loss": 0.0988,
229
+ "rewards/accuracies": 0.8374999761581421,
230
+ "rewards/chosen": -0.8084269762039185,
231
+ "rewards/margins": 1.9387495517730713,
232
+ "rewards/rejected": -2.7471766471862793,
233
  "step": 150
234
  },
235
  {
236
  "epoch": 0.39,
237
  "learning_rate": 3.793344535444142e-07,
238
+ "logits/chosen": -2.2556967735290527,
239
+ "logits/rejected": -2.1410129070281982,
240
+ "logps/chosen": -517.8903198242188,
241
+ "logps/rejected": -427.27178955078125,
242
+ "loss": 0.1095,
243
+ "rewards/accuracies": 0.8062499761581421,
244
+ "rewards/chosen": -0.9526297450065613,
245
+ "rewards/margins": 2.0250723361968994,
246
+ "rewards/rejected": -2.9777019023895264,
247
  "step": 160
248
  },
249
  {
250
  "epoch": 0.42,
251
  "learning_rate": 3.604108797288461e-07,
252
+ "logits/chosen": -2.2546138763427734,
253
+ "logits/rejected": -2.162337064743042,
254
+ "logps/chosen": -484.6184997558594,
255
+ "logps/rejected": -358.60198974609375,
256
+ "loss": 0.1069,
257
+ "rewards/accuracies": 0.893750011920929,
258
+ "rewards/chosen": -0.6528670787811279,
259
+ "rewards/margins": 1.8891479969024658,
260
+ "rewards/rejected": -2.5420150756835938,
261
  "step": 170
262
  },
263
  {
264
  "epoch": 0.44,
265
  "learning_rate": 3.40665367563858e-07,
266
+ "logits/chosen": -2.199352264404297,
267
+ "logits/rejected": -2.0996298789978027,
268
+ "logps/chosen": -520.5743408203125,
269
+ "logps/rejected": -414.6077575683594,
270
+ "loss": 0.0923,
271
+ "rewards/accuracies": 0.8062499761581421,
272
+ "rewards/chosen": -0.9770752787590027,
273
+ "rewards/margins": 1.9050334692001343,
274
+ "rewards/rejected": -2.882108688354492,
275
  "step": 180
276
  },
277
  {
278
  "epoch": 0.47,
279
  "learning_rate": 3.202449097526798e-07,
280
+ "logits/chosen": -2.2049007415771484,
281
+ "logits/rejected": -2.0761523246765137,
282
+ "logps/chosen": -545.6790771484375,
283
+ "logps/rejected": -503.28326416015625,
284
+ "loss": 0.0757,
285
+ "rewards/accuracies": 0.84375,
286
+ "rewards/chosen": -1.1332416534423828,
287
+ "rewards/margins": 2.640087604522705,
288
+ "rewards/rejected": -3.773329257965088,
289
  "step": 190
290
  },
291
  {
292
  "epoch": 0.49,
293
  "learning_rate": 2.993015235369905e-07,
294
+ "logits/chosen": -2.251674175262451,
295
+ "logits/rejected": -2.138532876968384,
296
+ "logps/chosen": -552.5167236328125,
297
+ "logps/rejected": -459.761962890625,
298
+ "loss": 0.0843,
299
+ "rewards/accuracies": 0.862500011920929,
300
+ "rewards/chosen": -1.0592725276947021,
301
+ "rewards/margins": 2.3839969635009766,
302
+ "rewards/rejected": -3.443269729614258,
303
  "step": 200
304
  },
305
  {
306
  "epoch": 0.52,
307
  "learning_rate": 2.7799111902582693e-07,
308
+ "logits/chosen": -2.216552495956421,
309
+ "logits/rejected": -2.116063356399536,
310
+ "logps/chosen": -539.1038818359375,
311
+ "logps/rejected": -468.47869873046875,
312
+ "loss": 0.0889,
313
+ "rewards/accuracies": 0.8374999761581421,
314
+ "rewards/chosen": -1.1180602312088013,
315
+ "rewards/margins": 2.2188849449157715,
316
+ "rewards/rejected": -3.336945056915283,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 0.54,
321
  "learning_rate": 2.564723385445869e-07,
322
+ "logits/chosen": -2.182385206222534,
323
+ "logits/rejected": -2.0952038764953613,
324
+ "logps/chosen": -506.44970703125,
325
+ "logps/rejected": -433.21954345703125,
326
+ "loss": 0.0928,
327
+ "rewards/accuracies": 0.84375,
328
+ "rewards/chosen": -1.1999738216400146,
329
+ "rewards/margins": 1.8092005252838135,
330
+ "rewards/rejected": -3.00917387008667,
331
  "step": 220
332
  },
333
  {
334
  "epoch": 0.57,
335
  "learning_rate": 2.3490537564442845e-07,
336
+ "logits/chosen": -2.2019972801208496,
337
+ "logits/rejected": -2.1013846397399902,
338
+ "logps/chosen": -493.3079528808594,
339
+ "logps/rejected": -435.74884033203125,
340
+ "loss": 0.0853,
341
+ "rewards/accuracies": 0.862500011920929,
342
+ "rewards/chosen": -1.0892283916473389,
343
+ "rewards/margins": 2.1229333877563477,
344
+ "rewards/rejected": -3.2121620178222656,
345
  "step": 230
346
  },
347
  {
348
  "epoch": 0.59,
349
  "learning_rate": 2.1345078256378801e-07,
350
+ "logits/chosen": -2.2013192176818848,
351
+ "logits/rejected": -2.0689620971679688,
352
+ "logps/chosen": -503.4147033691406,
353
+ "logps/rejected": -448.7828674316406,
354
+ "loss": 0.08,
355
+ "rewards/accuracies": 0.8374999761581421,
356
+ "rewards/chosen": -1.28254234790802,
357
+ "rewards/margins": 2.0458292961120605,
358
+ "rewards/rejected": -3.32837176322937,
359
  "step": 240
360
  },
361
  {
362
  "epoch": 0.62,
363
  "learning_rate": 1.9226827501969865e-07,
364
+ "logits/chosen": -2.2620677947998047,
365
+ "logits/rejected": -2.1344215869903564,
366
+ "logps/chosen": -535.8228149414062,
367
+ "logps/rejected": -445.83349609375,
368
+ "loss": 0.0772,
369
+ "rewards/accuracies": 0.887499988079071,
370
+ "rewards/chosen": -0.9264475107192993,
371
+ "rewards/margins": 2.3875911235809326,
372
+ "rewards/rejected": -3.3140385150909424,
373
  "step": 250
374
  },
375
  {
376
  "epoch": 0.64,
377
  "learning_rate": 1.715155432264775e-07,
378
+ "logits/chosen": -2.2268154621124268,
379
+ "logits/rejected": -2.149883270263672,
380
+ "logps/chosen": -518.2362060546875,
381
+ "logps/rejected": -444.4808654785156,
382
+ "loss": 0.0781,
383
+ "rewards/accuracies": 0.8062499761581421,
384
+ "rewards/chosen": -1.0662428140640259,
385
+ "rewards/margins": 2.1827731132507324,
386
+ "rewards/rejected": -3.249016284942627,
387
  "step": 260
388
  },
389
  {
390
  "epoch": 0.67,
391
  "learning_rate": 1.51347077992983e-07,
392
+ "logits/chosen": -2.2635481357574463,
393
+ "logits/rejected": -2.1383774280548096,
394
+ "logps/chosen": -510.13751220703125,
395
+ "logps/rejected": -457.0772399902344,
396
+ "loss": 0.0781,
397
+ "rewards/accuracies": 0.84375,
398
+ "rewards/chosen": -1.0047271251678467,
399
+ "rewards/margins": 2.324492931365967,
400
+ "rewards/rejected": -3.3292198181152344,
401
  "step": 270
402
  },
403
  {
404
  "epoch": 0.69,
405
  "learning_rate": 1.3191302063739906e-07,
406
+ "logits/chosen": -2.2417304515838623,
407
+ "logits/rejected": -2.121445417404175,
408
+ "logps/chosen": -491.12139892578125,
409
+ "logps/rejected": -425.1968688964844,
410
+ "loss": 0.0805,
411
+ "rewards/accuracies": 0.90625,
412
+ "rewards/chosen": -0.8460060954093933,
413
+ "rewards/margins": 2.316087007522583,
414
+ "rewards/rejected": -3.162093162536621,
415
  "step": 280
416
  },
417
  {
418
  "epoch": 0.72,
419
  "learning_rate": 1.1335804528119475e-07,
420
+ "logits/chosen": -2.2970786094665527,
421
+ "logits/rejected": -2.17289662361145,
422
+ "logps/chosen": -507.46661376953125,
423
+ "logps/rejected": -449.6181640625,
424
+ "loss": 0.0812,
425
+ "rewards/accuracies": 0.8500000238418579,
426
+ "rewards/chosen": -1.0554336309432983,
427
+ "rewards/margins": 2.2787580490112305,
428
+ "rewards/rejected": -3.3341917991638184,
429
  "step": 290
430
  },
431
  {
432
  "epoch": 0.74,
433
  "learning_rate": 9.582028184286423e-08,
434
+ "logits/chosen": -2.2653486728668213,
435
+ "logits/rejected": -2.1368870735168457,
436
+ "logps/chosen": -563.6956787109375,
437
+ "logps/rejected": -506.1378479003906,
438
+ "loss": 0.0811,
439
+ "rewards/accuracies": 0.8500000238418579,
440
+ "rewards/chosen": -1.0197770595550537,
441
+ "rewards/margins": 2.7816028594970703,
442
+ "rewards/rejected": -3.801379680633545,
443
  "step": 300
444
  },
445
  {
446
  "epoch": 0.76,
447
  "learning_rate": 7.943028774907065e-08,
448
+ "logits/chosen": -2.212477445602417,
449
+ "logits/rejected": -2.083967685699463,
450
+ "logps/chosen": -543.1881103515625,
451
+ "logps/rejected": -489.3092346191406,
452
+ "loss": 0.072,
453
+ "rewards/accuracies": 0.8500000238418579,
454
+ "rewards/chosen": -1.234198808670044,
455
+ "rewards/margins": 2.4262847900390625,
456
+ "rewards/rejected": -3.6604835987091064,
457
  "step": 310
458
  },
459
  {
460
  "epoch": 0.79,
461
  "learning_rate": 6.431007601814637e-08,
462
+ "logits/chosen": -2.237189531326294,
463
+ "logits/rejected": -2.118699550628662,
464
+ "logps/chosen": -533.3051147460938,
465
+ "logps/rejected": -482.6719665527344,
466
+ "loss": 0.0764,
467
+ "rewards/accuracies": 0.793749988079071,
468
+ "rewards/chosen": -1.317209243774414,
469
+ "rewards/margins": 2.3056979179382324,
470
+ "rewards/rejected": -3.6229069232940674,
471
  "step": 320
472
  },
473
  {
474
  "epoch": 0.81,
475
  "learning_rate": 5.0572206951246e-08,
476
+ "logits/chosen": -2.262988567352295,
477
+ "logits/rejected": -2.143887996673584,
478
+ "logps/chosen": -538.3980712890625,
479
+ "logps/rejected": -492.5909118652344,
480
+ "loss": 0.0884,
481
+ "rewards/accuracies": 0.824999988079071,
482
+ "rewards/chosen": -1.2063168287277222,
483
+ "rewards/margins": 2.362534523010254,
484
+ "rewards/rejected": -3.5688509941101074,
485
  "step": 330
486
  },
487
  {
488
  "epoch": 0.84,
489
  "learning_rate": 3.831895019292897e-08,
490
+ "logits/chosen": -2.211160182952881,
491
+ "logits/rejected": -2.0655343532562256,
492
+ "logps/chosen": -495.141357421875,
493
+ "logps/rejected": -464.81646728515625,
494
+ "loss": 0.0744,
495
+ "rewards/accuracies": 0.84375,
496
+ "rewards/chosen": -1.0855491161346436,
497
+ "rewards/margins": 2.5346646308898926,
498
+ "rewards/rejected": -3.6202139854431152,
499
  "step": 340
500
  },
501
  {
502
  "epoch": 0.86,
503
  "learning_rate": 2.764152339909756e-08,
504
+ "logits/chosen": -2.273918867111206,
505
+ "logits/rejected": -2.128694534301758,
506
+ "logps/chosen": -528.7555541992188,
507
+ "logps/rejected": -469.3017578125,
508
+ "loss": 0.0642,
509
+ "rewards/accuracies": 0.8374999761581421,
510
+ "rewards/chosen": -1.1073048114776611,
511
+ "rewards/margins": 2.4064698219299316,
512
+ "rewards/rejected": -3.513774871826172,
513
  "step": 350
514
  },
515
  {
516
  "epoch": 0.89,
517
  "learning_rate": 1.861941317991664e-08,
518
+ "logits/chosen": -2.2403626441955566,
519
+ "logits/rejected": -2.1174542903900146,
520
+ "logps/chosen": -488.1507873535156,
521
+ "logps/rejected": -457.27423095703125,
522
+ "loss": 0.0756,
523
+ "rewards/accuracies": 0.8687499761581421,
524
+ "rewards/chosen": -1.1823718547821045,
525
+ "rewards/margins": 2.2865800857543945,
526
+ "rewards/rejected": -3.46895170211792,
527
  "step": 360
528
  },
529
  {
530
  "epoch": 0.91,
531
  "learning_rate": 1.13197833728636e-08,
532
+ "logits/chosen": -2.2277872562408447,
533
+ "logits/rejected": -2.0999319553375244,
534
+ "logps/chosen": -529.3685302734375,
535
+ "logps/rejected": -459.83551025390625,
536
+ "loss": 0.0729,
537
+ "rewards/accuracies": 0.856249988079071,
538
+ "rewards/chosen": -1.064345121383667,
539
+ "rewards/margins": 2.42720365524292,
540
+ "rewards/rejected": -3.491549015045166,
541
  "step": 370
542
  },
543
  {
544
  "epoch": 0.94,
545
  "learning_rate": 5.79697505093521e-09,
546
+ "logits/chosen": -2.161315679550171,
547
+ "logits/rejected": -2.051104784011841,
548
+ "logps/chosen": -518.0635986328125,
549
+ "logps/rejected": -463.9219665527344,
550
+ "loss": 0.0742,
551
+ "rewards/accuracies": 0.856249988079071,
552
+ "rewards/chosen": -1.2277439832687378,
553
+ "rewards/margins": 2.3196287155151367,
554
+ "rewards/rejected": -3.547372817993164,
555
  "step": 380
556
  },
557
  {
558
  "epoch": 0.96,
559
  "learning_rate": 2.092101988131256e-09,
560
+ "logits/chosen": -2.2838692665100098,
561
+ "logits/rejected": -2.1495554447174072,
562
+ "logps/chosen": -561.1207275390625,
563
+ "logps/rejected": -473.84234619140625,
564
+ "loss": 0.07,
565
+ "rewards/accuracies": 0.856249988079071,
566
+ "rewards/chosen": -1.088555932044983,
567
+ "rewards/margins": 2.4874186515808105,
568
+ "rewards/rejected": -3.575974702835083,
569
  "step": 390
570
  },
571
  {
572
  "epoch": 0.99,
573
  "learning_rate": 2.327445937151673e-10,
574
+ "logits/chosen": -2.214580774307251,
575
+ "logits/rejected": -2.0943350791931152,
576
+ "logps/chosen": -564.2920532226562,
577
+ "logps/rejected": -481.778564453125,
578
+ "loss": 0.0661,
579
+ "rewards/accuracies": 0.8374999761581421,
580
+ "rewards/chosen": -1.2679953575134277,
581
+ "rewards/margins": 2.343169689178467,
582
+ "rewards/rejected": -3.6111652851104736,
583
  "step": 400
584
  },
585
  {
586
  "epoch": 1.0,
587
  "step": 405,
588
  "total_flos": 0.0,
589
+ "train_loss": 0.13281457475674005,
590
+ "train_runtime": 3219.0486,
591
+ "train_samples_per_second": 16.121,
592
  "train_steps_per_second": 0.126
593
  }
594
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34facb59b2833ff9a65d1ea6ca0671f7143189081be77d079ad67a7343d5aa7d
3
  size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0e23129935740782e931a75e4f8dcc9138d97706187a2a22ccc8a4a4f651ea
3
  size 5944