wzhouad commited on
Commit
67eaf45
1 Parent(s): 8847f9f

Model save

Browse files
README.md CHANGED
@@ -35,7 +35,7 @@ The following hyperparameters were used during training:
35
  - learning_rate: 3e-06
36
  - train_batch_size: 2
37
  - eval_batch_size: 8
38
- - seed: 5
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
  - gradient_accumulation_steps: 8
 
35
  - learning_rate: 3e-06
36
  - train_batch_size: 2
37
  - eval_batch_size: 8
38
+ - seed: 1
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
  - gradient_accumulation_steps: 8
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.36347593488827556,
4
- "train_runtime": 5281.0991,
5
- "train_samples": 45548,
6
- "train_samples_per_second": 8.625,
7
- "train_steps_per_second": 0.067
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.06463673111027891,
4
+ "train_runtime": 6408.5161,
5
+ "train_samples": 61134,
6
+ "train_samples_per_second": 9.539,
7
+ "train_steps_per_second": 0.074
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f6a53f0de4447eb60a67b94195b21f61ff96d26641e186ce68d47553b85e759
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27af38a706248a0b99ae38fc74e38845493c9f49e3ca192ac362b2cdeb19307c
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:992cfde5d1ddcabd7207fb645d58f7600e8efcbfea60a4bca5c54264d3e3f8d6
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca904bd84f3705e920e8b8be6855e11df01f0a3ed29009d2c7ddc39b5509121b
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:658f104b0a1fd1120eaff2ca804fcd58a1623759de1eaff60f24a3db80cfd2fc
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f5a3559e562c55ca125abbea481359306cbfe893289ea7799a26f7a84812eb
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce53dea6eec3396aa58034e0ca8d59e1d64baa02ef8e6bb7cbd9619deeb20423
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a2ae3fba6aa6db34d28591f49acae193d0078360041a1d200071a70813f087
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.36347593488827556,
4
- "train_runtime": 5281.0991,
5
- "train_samples": 45548,
6
- "train_samples_per_second": 8.625,
7
- "train_steps_per_second": 0.067
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.06463673111027891,
4
+ "train_runtime": 6408.5161,
5
+ "train_samples": 61134,
6
+ "train_samples_per_second": 9.539,
7
+ "train_steps_per_second": 0.074
8
  }
trainer_state.json CHANGED
@@ -1,515 +1,683 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9975412715138743,
5
  "eval_steps": 10000,
6
- "global_step": 355,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03,
13
- "learning_rate": 8.333333333333334e-07,
14
- "logits/chosen": -0.017936866730451584,
15
- "logits/rejected": 0.045307982712984085,
16
- "logps/chosen": -322.30169677734375,
17
- "logps/rejected": -218.5985107421875,
18
- "loss": 0.5197,
19
- "rewards/accuracies": 0.42500001192092896,
20
- "rewards/chosen": 0.0008429696899838746,
21
- "rewards/margins": 0.0020774812437593937,
22
- "rewards/rejected": -0.0012345117283985019,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.06,
27
- "learning_rate": 1.6666666666666669e-06,
28
- "logits/chosen": -0.030718382447957993,
29
- "logits/rejected": -0.028245821595191956,
30
- "logps/chosen": -333.917236328125,
31
- "logps/rejected": -203.45745849609375,
32
- "loss": 0.5119,
33
- "rewards/accuracies": 0.668749988079071,
34
- "rewards/chosen": 0.004156398121267557,
35
- "rewards/margins": 0.04196573421359062,
36
- "rewards/rejected": -0.0378093346953392,
37
  "step": 20
38
  },
39
  {
40
- "epoch": 0.08,
41
- "learning_rate": 2.5e-06,
42
- "logits/chosen": -0.08321847021579742,
43
- "logits/rejected": -0.0037727858871221542,
44
- "logps/chosen": -432.2870178222656,
45
- "logps/rejected": -275.5068664550781,
46
- "loss": 0.5456,
47
- "rewards/accuracies": 0.550000011920929,
48
- "rewards/chosen": -0.017191946506500244,
49
- "rewards/margins": 0.21662700176239014,
50
- "rewards/rejected": -0.23381897807121277,
51
  "step": 30
52
  },
53
  {
54
- "epoch": 0.11,
55
- "learning_rate": 2.9988362934929793e-06,
56
- "logits/chosen": -0.037398938089609146,
57
- "logits/rejected": -0.007799749728292227,
58
- "logps/chosen": -359.85650634765625,
59
- "logps/rejected": -278.62567138671875,
60
- "loss": 0.5288,
61
- "rewards/accuracies": 0.65625,
62
- "rewards/chosen": 0.04756924882531166,
63
- "rewards/margins": 0.2058154046535492,
64
- "rewards/rejected": -0.15824612975120544,
65
  "step": 40
66
  },
67
  {
68
- "epoch": 0.14,
69
- "learning_rate": 2.985765322825759e-06,
70
- "logits/chosen": -0.0844019427895546,
71
- "logits/rejected": -0.06939023733139038,
72
- "logps/chosen": -276.42376708984375,
73
- "logps/rejected": -213.7895965576172,
74
- "loss": 0.4848,
75
- "rewards/accuracies": 0.59375,
76
- "rewards/chosen": -0.014759841375052929,
77
- "rewards/margins": 0.1379159539937973,
78
- "rewards/rejected": -0.1526757776737213,
79
  "step": 50
80
  },
81
  {
82
- "epoch": 0.17,
83
- "learning_rate": 2.9582958419982717e-06,
84
- "logits/chosen": 0.016000710427761078,
85
- "logits/rejected": 0.07897808402776718,
86
- "logps/chosen": -367.83868408203125,
87
- "logps/rejected": -227.6174774169922,
88
- "loss": 0.4654,
89
- "rewards/accuracies": 0.675000011920929,
90
- "rewards/chosen": -0.05412111431360245,
91
- "rewards/margins": 0.4200070798397064,
92
- "rewards/rejected": -0.4741281569004059,
93
  "step": 60
94
  },
95
  {
96
- "epoch": 0.2,
97
- "learning_rate": 2.916694056980408e-06,
98
- "logits/chosen": -0.0025311470963060856,
99
- "logits/rejected": 0.02233794890344143,
100
- "logps/chosen": -312.28948974609375,
101
- "logps/rejected": -265.64617919921875,
102
- "loss": 0.464,
103
- "rewards/accuracies": 0.6312500238418579,
104
- "rewards/chosen": -0.3601204752922058,
105
- "rewards/margins": 0.24295297265052795,
106
- "rewards/rejected": -0.6030734181404114,
107
  "step": 70
108
  },
109
  {
110
- "epoch": 0.22,
111
- "learning_rate": 2.8613631295064357e-06,
112
- "logits/chosen": -0.21361954510211945,
113
- "logits/rejected": -0.14258694648742676,
114
- "logps/chosen": -390.8519287109375,
115
- "logps/rejected": -232.2322540283203,
116
- "loss": 0.4208,
117
- "rewards/accuracies": 0.6875,
118
- "rewards/chosen": -0.1345187872648239,
119
- "rewards/margins": 0.3031948208808899,
120
- "rewards/rejected": -0.4377136826515198,
121
  "step": 80
122
  },
123
  {
124
- "epoch": 0.25,
125
- "learning_rate": 2.792839270045916e-06,
126
- "logits/chosen": -0.07295586168766022,
127
- "logits/rejected": -0.0871509537100792,
128
- "logps/chosen": -291.86248779296875,
129
- "logps/rejected": -255.58837890625,
130
- "loss": 0.4122,
131
- "rewards/accuracies": 0.5562499761581421,
132
- "rewards/chosen": -0.2528306543827057,
133
- "rewards/margins": 0.14771175384521484,
134
- "rewards/rejected": -0.4005424380302429,
135
  "step": 90
136
  },
137
  {
138
- "epoch": 0.28,
139
- "learning_rate": 2.711786541403051e-06,
140
- "logits/chosen": -0.09233228862285614,
141
- "logits/rejected": -0.02968590334057808,
142
- "logps/chosen": -370.9435729980469,
143
- "logps/rejected": -273.88336181640625,
144
- "loss": 0.4075,
145
- "rewards/accuracies": 0.6187499761581421,
146
- "rewards/chosen": -0.41826897859573364,
147
- "rewards/margins": 0.3016797602176666,
148
- "rewards/rejected": -0.7199487686157227,
149
  "step": 100
150
  },
151
  {
152
- "epoch": 0.31,
153
- "learning_rate": 2.6189904233026363e-06,
154
- "logits/chosen": -0.154756560921669,
155
- "logits/rejected": -0.07636446505784988,
156
- "logps/chosen": -376.0904235839844,
157
- "logps/rejected": -294.49786376953125,
158
- "loss": 0.3787,
159
- "rewards/accuracies": 0.612500011920929,
160
- "rewards/chosen": -0.4825851023197174,
161
- "rewards/margins": 0.3285244107246399,
162
- "rewards/rejected": -0.8111095428466797,
163
  "step": 110
164
  },
165
  {
166
- "epoch": 0.34,
167
- "learning_rate": 2.515350200328027e-06,
168
- "logits/chosen": -0.1974565088748932,
169
- "logits/rejected": -0.15066322684288025,
170
- "logps/chosen": -353.86907958984375,
171
- "logps/rejected": -284.64935302734375,
172
- "loss": 0.3675,
173
- "rewards/accuracies": 0.59375,
174
- "rewards/chosen": -0.5684340000152588,
175
- "rewards/margins": 0.19724634289741516,
176
- "rewards/rejected": -0.7656803131103516,
177
  "step": 120
178
  },
179
  {
180
- "epoch": 0.37,
181
- "learning_rate": 2.401870246979413e-06,
182
- "logits/chosen": -0.19759312272071838,
183
- "logits/rejected": -0.14790299534797668,
184
- "logps/chosen": -408.52532958984375,
185
- "logps/rejected": -301.2213439941406,
186
- "loss": 0.3345,
187
  "rewards/accuracies": 0.6625000238418579,
188
- "rewards/chosen": -0.6431132555007935,
189
- "rewards/margins": 0.26965436339378357,
190
- "rewards/rejected": -0.9127677083015442,
191
  "step": 130
192
  },
193
  {
194
- "epoch": 0.39,
195
- "learning_rate": 2.279650294308645e-06,
196
- "logits/chosen": -0.275656521320343,
197
- "logits/rejected": -0.1713864505290985,
198
- "logps/chosen": -383.6747131347656,
199
- "logps/rejected": -270.3328857421875,
200
- "loss": 0.3348,
201
  "rewards/accuracies": 0.637499988079071,
202
- "rewards/chosen": -0.6792303323745728,
203
- "rewards/margins": 0.3043554425239563,
204
- "rewards/rejected": -0.9835857152938843,
205
  "step": 140
206
  },
207
  {
208
- "epoch": 0.42,
209
- "learning_rate": 2.1498747724563957e-06,
210
- "logits/chosen": -0.27238455414772034,
211
- "logits/rejected": -0.22029218077659607,
212
- "logps/chosen": -391.3775329589844,
213
- "logps/rejected": -335.1509704589844,
214
- "loss": 0.3153,
215
- "rewards/accuracies": 0.5625,
216
- "rewards/chosen": -0.8400734663009644,
217
- "rewards/margins": 0.28538864850997925,
218
- "rewards/rejected": -1.1254620552062988,
219
  "step": 150
220
  },
221
  {
222
- "epoch": 0.45,
223
- "learning_rate": 2.0138013323728074e-06,
224
- "logits/chosen": -0.315167099237442,
225
- "logits/rejected": -0.23690147697925568,
226
- "logps/chosen": -406.4299011230469,
227
- "logps/rejected": -320.24237060546875,
228
- "loss": 0.2884,
229
- "rewards/accuracies": 0.6312500238418579,
230
- "rewards/chosen": -0.9414618611335754,
231
- "rewards/margins": 0.28279590606689453,
232
- "rewards/rejected": -1.2242577075958252,
233
  "step": 160
234
  },
235
  {
236
- "epoch": 0.48,
237
- "learning_rate": 1.8727486579573409e-06,
238
- "logits/chosen": -0.31794100999832153,
239
- "logits/rejected": -0.24908527731895447,
240
- "logps/chosen": -438.0152893066406,
241
- "logps/rejected": -302.74273681640625,
242
- "loss": 0.3332,
243
- "rewards/accuracies": 0.65625,
244
- "rewards/chosen": -0.6466701626777649,
245
- "rewards/margins": 0.3362303376197815,
246
- "rewards/rejected": -0.9829004406929016,
247
  "step": 170
248
  },
249
  {
250
- "epoch": 0.51,
251
- "learning_rate": 1.7280836867300083e-06,
252
- "logits/chosen": -0.3656935691833496,
253
- "logits/rejected": -0.32054948806762695,
254
- "logps/chosen": -409.75927734375,
255
- "logps/rejected": -302.7572021484375,
256
- "loss": 0.3209,
257
- "rewards/accuracies": 0.6499999761581421,
258
- "rewards/chosen": -0.7197253108024597,
259
- "rewards/margins": 0.2899569571018219,
260
- "rewards/rejected": -1.009682297706604,
261
  "step": 180
262
  },
263
  {
264
- "epoch": 0.53,
265
- "learning_rate": 1.5812083628781265e-06,
266
- "logits/chosen": -0.3520641624927521,
267
- "logits/rejected": -0.31913143396377563,
268
- "logps/chosen": -355.8263244628906,
269
- "logps/rejected": -325.4466247558594,
270
- "loss": 0.3024,
271
- "rewards/accuracies": 0.6000000238418579,
272
- "rewards/chosen": -1.009711503982544,
273
- "rewards/margins": 0.15852129459381104,
274
- "rewards/rejected": -1.1682326793670654,
275
  "step": 190
276
  },
277
  {
278
- "epoch": 0.56,
279
- "learning_rate": 1.433546051054432e-06,
280
- "logits/chosen": -0.334557443857193,
281
- "logits/rejected": -0.31981879472732544,
282
- "logps/chosen": -380.0057067871094,
283
- "logps/rejected": -353.52392578125,
284
- "loss": 0.304,
285
- "rewards/accuracies": 0.612500011920929,
286
- "rewards/chosen": -1.09440016746521,
287
- "rewards/margins": 0.2975843846797943,
288
- "rewards/rejected": -1.3919847011566162,
289
  "step": 200
290
  },
291
  {
292
- "epoch": 0.59,
293
- "learning_rate": 1.2865277425900725e-06,
294
- "logits/chosen": -0.37745895981788635,
295
- "logits/rejected": -0.3244924545288086,
296
- "logps/chosen": -395.09844970703125,
297
- "logps/rejected": -331.40655517578125,
298
- "loss": 0.2921,
299
- "rewards/accuracies": 0.6187499761581421,
300
- "rewards/chosen": -1.0221917629241943,
301
- "rewards/margins": 0.2329825460910797,
302
- "rewards/rejected": -1.2551742792129517,
303
  "step": 210
304
  },
305
  {
306
- "epoch": 0.62,
307
- "learning_rate": 1.141578187797663e-06,
308
- "logits/chosen": -0.2749294638633728,
309
- "logits/rejected": -0.2453482449054718,
310
- "logps/chosen": -422.58477783203125,
311
- "logps/rejected": -323.53900146484375,
312
- "loss": 0.3127,
313
- "rewards/accuracies": 0.65625,
314
- "rewards/chosen": -0.8388057947158813,
315
- "rewards/margins": 0.24924850463867188,
316
- "rewards/rejected": -1.0880544185638428,
317
  "step": 220
318
  },
319
  {
320
- "epoch": 0.65,
321
- "learning_rate": 1.0001020887558839e-06,
322
- "logits/chosen": -0.2927996516227722,
323
- "logits/rejected": -0.2675902247428894,
324
- "logps/chosen": -373.8236083984375,
325
- "logps/rejected": -345.1734924316406,
326
- "loss": 0.3375,
327
- "rewards/accuracies": 0.606249988079071,
328
- "rewards/chosen": -0.8683871030807495,
329
- "rewards/margins": 0.34467414021492004,
330
- "rewards/rejected": -1.2130613327026367,
331
  "step": 230
332
  },
333
  {
334
- "epoch": 0.67,
335
- "learning_rate": 8.634704863809502e-07,
336
- "logits/chosen": -0.26392003893852234,
337
- "logits/rejected": -0.26393693685531616,
338
- "logps/chosen": -417.762451171875,
339
- "logps/rejected": -343.877197265625,
340
- "loss": 0.3195,
341
- "rewards/accuracies": 0.6000000238418579,
342
- "rewards/chosen": -0.9277390241622925,
343
- "rewards/margins": 0.2722298502922058,
344
- "rewards/rejected": -1.199968934059143,
345
  "step": 240
346
  },
347
  {
348
- "epoch": 0.7,
349
- "learning_rate": 7.330074737074666e-07,
350
- "logits/chosen": -0.32263797521591187,
351
- "logits/rejected": -0.26007968187332153,
352
- "logps/chosen": -449.156005859375,
353
- "logps/rejected": -341.98846435546875,
354
- "loss": 0.3217,
355
- "rewards/accuracies": 0.581250011920929,
356
- "rewards/chosen": -0.9365653991699219,
357
- "rewards/margins": 0.19516155123710632,
358
- "rewards/rejected": -1.1317269802093506,
359
  "step": 250
360
  },
361
  {
362
- "epoch": 0.73,
363
- "learning_rate": 6.099773641398835e-07,
364
- "logits/chosen": -0.26862549781799316,
365
- "logits/rejected": -0.21074727177619934,
366
- "logps/chosen": -437.16864013671875,
367
- "logps/rejected": -361.11383056640625,
368
- "loss": 0.3233,
369
- "rewards/accuracies": 0.637499988079071,
370
- "rewards/chosen": -0.8692190051078796,
371
- "rewards/margins": 0.34968703985214233,
372
- "rewards/rejected": -1.218906044960022,
373
  "step": 260
374
  },
375
  {
376
- "epoch": 0.76,
377
- "learning_rate": 4.955724390266841e-07,
378
- "logits/chosen": -0.25061318278312683,
379
- "logits/rejected": -0.21778492629528046,
380
- "logps/chosen": -413.89892578125,
381
- "logps/rejected": -349.9348449707031,
382
- "loss": 0.3177,
383
- "rewards/accuracies": 0.612500011920929,
384
- "rewards/chosen": -0.9383090138435364,
385
- "rewards/margins": 0.2968607246875763,
386
- "rewards/rejected": -1.235169768333435,
387
  "step": 270
388
  },
389
  {
390
- "epoch": 0.79,
391
- "learning_rate": 3.9090139329520333e-07,
392
- "logits/chosen": -0.2774963974952698,
393
- "logits/rejected": -0.24690791964530945,
394
- "logps/chosen": -425.43524169921875,
395
- "logps/rejected": -339.97259521484375,
396
- "loss": 0.316,
397
- "rewards/accuracies": 0.675000011920929,
398
- "rewards/chosen": -0.9426881670951843,
399
- "rewards/margins": 0.4042733609676361,
400
- "rewards/rejected": -1.346961259841919,
401
  "step": 280
402
  },
403
  {
404
- "epoch": 0.81,
405
- "learning_rate": 2.9697859112011724e-07,
406
- "logits/chosen": -0.2760419547557831,
407
- "logits/rejected": -0.24498526751995087,
408
- "logps/chosen": -412.57318115234375,
409
- "logps/rejected": -335.27398681640625,
410
- "loss": 0.3082,
411
- "rewards/accuracies": 0.71875,
412
- "rewards/chosen": -0.7900550961494446,
413
- "rewards/margins": 0.4711545407772064,
414
- "rewards/rejected": -1.261209487915039,
415
  "step": 290
416
  },
417
  {
418
- "epoch": 0.84,
419
- "learning_rate": 2.1471423574861643e-07,
420
- "logits/chosen": -0.3094063997268677,
421
- "logits/rejected": -0.2514321208000183,
422
- "logps/chosen": -432.04644775390625,
423
- "logps/rejected": -345.2815856933594,
424
- "loss": 0.3048,
425
- "rewards/accuracies": 0.65625,
426
- "rewards/chosen": -0.8672820329666138,
427
- "rewards/margins": 0.400570809841156,
428
- "rewards/rejected": -1.2678529024124146,
429
  "step": 300
430
  },
431
  {
432
- "epoch": 0.87,
433
- "learning_rate": 1.449055487462102e-07,
434
- "logits/chosen": -0.3206644654273987,
435
- "logits/rejected": -0.27432483434677124,
436
- "logps/chosen": -436.33056640625,
437
- "logps/rejected": -320.2474670410156,
438
- "loss": 0.3097,
439
- "rewards/accuracies": 0.637499988079071,
440
- "rewards/chosen": -0.9078308343887329,
441
- "rewards/margins": 0.35346266627311707,
442
- "rewards/rejected": -1.2612934112548828,
443
  "step": 310
444
  },
445
  {
446
- "epoch": 0.9,
447
- "learning_rate": 8.822904414485194e-08,
448
- "logits/chosen": -0.2824671268463135,
449
- "logits/rejected": -0.23151321709156036,
450
- "logps/chosen": -428.50921630859375,
451
- "logps/rejected": -328.49884033203125,
452
- "loss": 0.3103,
453
- "rewards/accuracies": 0.6312500238418579,
454
- "rewards/chosen": -0.8764031529426575,
455
- "rewards/margins": 0.3287006914615631,
456
- "rewards/rejected": -1.205103874206543,
457
  "step": 320
458
  },
459
  {
460
- "epoch": 0.93,
461
- "learning_rate": 4.523397236438398e-08,
462
- "logits/chosen": -0.24987097084522247,
463
- "logits/rejected": -0.22064971923828125,
464
- "logps/chosen": -393.6728515625,
465
- "logps/rejected": -334.59051513671875,
466
- "loss": 0.3136,
467
- "rewards/accuracies": 0.625,
468
- "rewards/chosen": -0.8506187200546265,
469
- "rewards/margins": 0.35463377833366394,
470
- "rewards/rejected": -1.2052525281906128,
471
  "step": 330
472
  },
473
  {
474
- "epoch": 0.96,
475
- "learning_rate": 1.6336997442095825e-08,
476
- "logits/chosen": -0.2847253680229187,
477
- "logits/rejected": -0.21411709487438202,
478
- "logps/chosen": -375.3291015625,
479
- "logps/rejected": -291.4599914550781,
480
- "loss": 0.3118,
481
- "rewards/accuracies": 0.550000011920929,
482
- "rewards/chosen": -0.9054251909255981,
483
- "rewards/margins": 0.24709534645080566,
484
- "rewards/rejected": -1.1525206565856934,
485
  "step": 340
486
  },
487
  {
488
- "epoch": 0.98,
489
- "learning_rate": 1.8181591531977737e-09,
490
- "logits/chosen": -0.28506776690483093,
491
- "logits/rejected": -0.27561822533607483,
492
- "logps/chosen": -402.9325866699219,
493
- "logps/rejected": -352.8019104003906,
494
- "loss": 0.3253,
495
- "rewards/accuracies": 0.6187499761581421,
496
- "rewards/chosen": -0.9050580263137817,
497
- "rewards/margins": 0.32159894704818726,
498
- "rewards/rejected": -1.2266569137573242,
499
  "step": 350
500
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  {
502
  "epoch": 1.0,
503
- "step": 355,
504
  "total_flos": 0.0,
505
- "train_loss": 0.36347593488827556,
506
- "train_runtime": 5281.0991,
507
- "train_samples_per_second": 8.625,
508
- "train_steps_per_second": 0.067
509
  }
510
  ],
511
  "logging_steps": 10,
512
- "max_steps": 355,
513
  "num_train_epochs": 1,
514
  "save_steps": 10000,
515
  "total_flos": 0.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.998691442030882,
5
  "eval_steps": 10000,
6
+ "global_step": 477,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "learning_rate": 6.25e-07,
14
+ "logits/chosen": 0.17706245183944702,
15
+ "logits/rejected": 0.2540971636772156,
16
+ "logps/chosen": -354.3509826660156,
17
+ "logps/rejected": -305.29473876953125,
18
+ "loss": 0.1819,
19
+ "rewards/accuracies": 0.4124999940395355,
20
+ "rewards/chosen": 0.0004928814596496522,
21
+ "rewards/margins": 0.001260685734450817,
22
+ "rewards/rejected": -0.0007678042748011649,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.04,
27
+ "learning_rate": 1.25e-06,
28
+ "logits/chosen": 0.08185596764087677,
29
+ "logits/rejected": 0.20913369953632355,
30
+ "logps/chosen": -316.39178466796875,
31
+ "logps/rejected": -277.11273193359375,
32
+ "loss": 0.1822,
33
+ "rewards/accuracies": 0.706250011920929,
34
+ "rewards/chosen": 0.0034345737658441067,
35
+ "rewards/margins": 0.014207230880856514,
36
+ "rewards/rejected": -0.010772655718028545,
37
  "step": 20
38
  },
39
  {
40
+ "epoch": 0.06,
41
+ "learning_rate": 1.875e-06,
42
+ "logits/chosen": 0.2634967267513275,
43
+ "logits/rejected": 0.3354651629924774,
44
+ "logps/chosen": -301.5518493652344,
45
+ "logps/rejected": -310.61309814453125,
46
+ "loss": 0.1772,
47
+ "rewards/accuracies": 0.6187499761581421,
48
+ "rewards/chosen": -0.06584969907999039,
49
+ "rewards/margins": 0.057301245629787445,
50
+ "rewards/rejected": -0.12315094470977783,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 0.08,
55
+ "learning_rate": 2.5e-06,
56
+ "logits/chosen": 0.27594679594039917,
57
+ "logits/rejected": 0.38354000449180603,
58
+ "logps/chosen": -374.3425598144531,
59
+ "logps/rejected": -361.6021423339844,
60
+ "loss": 0.1397,
61
+ "rewards/accuracies": 0.6625000238418579,
62
+ "rewards/chosen": -0.2677033543586731,
63
+ "rewards/margins": 0.14364728331565857,
64
+ "rewards/rejected": -0.41135063767433167,
65
  "step": 40
66
  },
67
  {
68
+ "epoch": 0.1,
69
+ "learning_rate": 2.999839121261416e-06,
70
+ "logits/chosen": 0.3216066062450409,
71
+ "logits/rejected": 0.4195839762687683,
72
+ "logps/chosen": -371.8332824707031,
73
+ "logps/rejected": -384.00860595703125,
74
+ "loss": 0.0862,
75
+ "rewards/accuracies": 0.699999988079071,
76
+ "rewards/chosen": -0.6128066778182983,
77
+ "rewards/margins": 0.39045700430870056,
78
+ "rewards/rejected": -1.0032637119293213,
79
  "step": 50
80
  },
81
  {
82
+ "epoch": 0.13,
83
+ "learning_rate": 2.994211988057582e-06,
84
+ "logits/chosen": 0.16005071997642517,
85
+ "logits/rejected": 0.29673656821250916,
86
+ "logps/chosen": -350.0096740722656,
87
+ "logps/rejected": -372.6922912597656,
88
+ "loss": 0.0896,
89
+ "rewards/accuracies": 0.65625,
90
+ "rewards/chosen": -0.5713089108467102,
91
+ "rewards/margins": 0.4011602997779846,
92
+ "rewards/rejected": -0.9724692106246948,
93
  "step": 60
94
  },
95
  {
96
+ "epoch": 0.15,
97
+ "learning_rate": 2.9805753939568693e-06,
98
+ "logits/chosen": 0.0862194150686264,
99
+ "logits/rejected": 0.18484732508659363,
100
+ "logps/chosen": -361.5823974609375,
101
+ "logps/rejected": -393.074462890625,
102
+ "loss": 0.1054,
103
+ "rewards/accuracies": 0.637499988079071,
104
+ "rewards/chosen": -0.3185870051383972,
105
+ "rewards/margins": 0.42267927527427673,
106
+ "rewards/rejected": -0.7412663102149963,
107
  "step": 70
108
  },
109
  {
110
+ "epoch": 0.17,
111
+ "learning_rate": 2.959002435526626e-06,
112
+ "logits/chosen": 0.14816270768642426,
113
+ "logits/rejected": 0.2659767270088196,
114
+ "logps/chosen": -378.1669616699219,
115
+ "logps/rejected": -401.71990966796875,
116
+ "loss": 0.0828,
117
+ "rewards/accuracies": 0.7749999761581421,
118
+ "rewards/chosen": -0.5950332880020142,
119
+ "rewards/margins": 0.5514911413192749,
120
+ "rewards/rejected": -1.1465245485305786,
121
  "step": 80
122
  },
123
  {
124
+ "epoch": 0.19,
125
+ "learning_rate": 2.929608750821129e-06,
126
+ "logits/chosen": 0.025451337918639183,
127
+ "logits/rejected": 0.19968536496162415,
128
+ "logps/chosen": -441.95806884765625,
129
+ "logps/rejected": -415.0679626464844,
130
+ "loss": 0.0542,
131
+ "rewards/accuracies": 0.637499988079071,
132
+ "rewards/chosen": -1.0501652956008911,
133
+ "rewards/margins": 0.4297688901424408,
134
+ "rewards/rejected": -1.4799340963363647,
135
  "step": 90
136
  },
137
  {
138
+ "epoch": 0.21,
139
+ "learning_rate": 2.892551899524109e-06,
140
+ "logits/chosen": 0.12514810264110565,
141
+ "logits/rejected": 0.3461776673793793,
142
+ "logps/chosen": -407.69647216796875,
143
+ "logps/rejected": -429.8515625,
144
+ "loss": 0.0753,
145
+ "rewards/accuracies": 0.6812499761581421,
146
+ "rewards/chosen": -0.7313500046730042,
147
+ "rewards/margins": 0.6433447599411011,
148
+ "rewards/rejected": -1.37469482421875,
149
  "step": 100
150
  },
151
  {
152
+ "epoch": 0.23,
153
+ "learning_rate": 2.848030518377739e-06,
154
+ "logits/chosen": 0.14924690127372742,
155
+ "logits/rejected": 0.21244895458221436,
156
+ "logps/chosen": -398.8333740234375,
157
+ "logps/rejected": -449.89080810546875,
158
+ "loss": 0.0684,
159
+ "rewards/accuracies": 0.6812499761581421,
160
+ "rewards/chosen": -0.8621395230293274,
161
+ "rewards/margins": 0.6422672271728516,
162
+ "rewards/rejected": -1.5044066905975342,
163
  "step": 110
164
  },
165
  {
166
+ "epoch": 0.25,
167
+ "learning_rate": 2.7962832564252724e-06,
168
+ "logits/chosen": 0.06671784818172455,
169
+ "logits/rejected": 0.14493227005004883,
170
+ "logps/chosen": -412.3326110839844,
171
+ "logps/rejected": -474.692138671875,
172
+ "loss": 0.0523,
173
+ "rewards/accuracies": 0.6937500238418579,
174
+ "rewards/chosen": -0.9177900552749634,
175
+ "rewards/margins": 0.6171673536300659,
176
+ "rewards/rejected": -1.5349572896957397,
177
  "step": 120
178
  },
179
  {
180
+ "epoch": 0.27,
181
+ "learning_rate": 2.7375874957747644e-06,
182
+ "logits/chosen": 0.037053029984235764,
183
+ "logits/rejected": 0.2220267802476883,
184
+ "logps/chosen": -437.1244201660156,
185
+ "logps/rejected": -436.5159606933594,
186
+ "loss": 0.0564,
187
  "rewards/accuracies": 0.6625000238418579,
188
+ "rewards/chosen": -0.9862833023071289,
189
+ "rewards/margins": 0.5303726196289062,
190
+ "rewards/rejected": -1.5166559219360352,
191
  "step": 130
192
  },
193
  {
194
+ "epoch": 0.29,
195
+ "learning_rate": 2.672257864741005e-06,
196
+ "logits/chosen": 0.05896978825330734,
197
+ "logits/rejected": 0.23563237488269806,
198
+ "logps/chosen": -409.24481201171875,
199
+ "logps/rejected": -379.90924072265625,
200
+ "loss": 0.0706,
201
  "rewards/accuracies": 0.637499988079071,
202
+ "rewards/chosen": -0.7720142602920532,
203
+ "rewards/margins": 0.32800909876823425,
204
+ "rewards/rejected": -1.1000233888626099,
205
  "step": 140
206
  },
207
  {
208
+ "epoch": 0.31,
209
+ "learning_rate": 2.600644551335706e-06,
210
+ "logits/chosen": 0.10780592262744904,
211
+ "logits/rejected": 0.15396630764007568,
212
+ "logps/chosen": -420.455078125,
213
+ "logps/rejected": -461.3589782714844,
214
+ "loss": 0.0715,
215
+ "rewards/accuracies": 0.699999988079071,
216
+ "rewards/chosen": -0.8515451550483704,
217
+ "rewards/margins": 0.6125485301017761,
218
+ "rewards/rejected": -1.464093804359436,
219
  "step": 150
220
  },
221
  {
222
+ "epoch": 0.33,
223
+ "learning_rate": 2.5231314261461732e-06,
224
+ "logits/chosen": 0.001874491572380066,
225
+ "logits/rejected": 0.0530397966504097,
226
+ "logps/chosen": -404.02886962890625,
227
+ "logps/rejected": -476.5221252441406,
228
+ "loss": 0.0509,
229
+ "rewards/accuracies": 0.731249988079071,
230
+ "rewards/chosen": -1.0196731090545654,
231
+ "rewards/margins": 0.9385590553283691,
232
+ "rewards/rejected": -1.9582321643829346,
233
  "step": 160
234
  },
235
  {
236
+ "epoch": 0.36,
237
+ "learning_rate": 2.440133984664454e-06,
238
+ "logits/chosen": 0.03371699899435043,
239
+ "logits/rejected": 0.11997250467538834,
240
+ "logps/chosen": -449.0216369628906,
241
+ "logps/rejected": -491.6813049316406,
242
+ "loss": 0.0569,
243
+ "rewards/accuracies": 0.6875,
244
+ "rewards/chosen": -1.126532793045044,
245
+ "rewards/margins": 0.7117874026298523,
246
+ "rewards/rejected": -1.8383201360702515,
247
  "step": 170
248
  },
249
  {
250
+ "epoch": 0.38,
251
+ "learning_rate": 2.3520971200967337e-06,
252
+ "logits/chosen": -0.039311788976192474,
253
+ "logits/rejected": 0.11384377628564835,
254
+ "logps/chosen": -429.73907470703125,
255
+ "logps/rejected": -488.38665771484375,
256
+ "loss": 0.0497,
257
+ "rewards/accuracies": 0.7562500238418579,
258
+ "rewards/chosen": -1.037070870399475,
259
+ "rewards/margins": 0.8480435609817505,
260
+ "rewards/rejected": -1.8851144313812256,
261
  "step": 180
262
  },
263
  {
264
+ "epoch": 0.4,
265
+ "learning_rate": 2.2594927385914546e-06,
266
+ "logits/chosen": -0.043736983090639114,
267
+ "logits/rejected": 0.0784933939576149,
268
+ "logps/chosen": -478.27044677734375,
269
+ "logps/rejected": -528.6622314453125,
270
+ "loss": 0.0427,
271
+ "rewards/accuracies": 0.6625000238418579,
272
+ "rewards/chosen": -1.4361096620559692,
273
+ "rewards/margins": 0.7232626676559448,
274
+ "rewards/rejected": -2.159372329711914,
275
  "step": 190
276
  },
277
  {
278
+ "epoch": 0.42,
279
+ "learning_rate": 2.1628172296692954e-06,
280
+ "logits/chosen": -0.07505004107952118,
281
+ "logits/rejected": 0.08389478921890259,
282
+ "logps/chosen": -441.46142578125,
283
+ "logps/rejected": -471.33380126953125,
284
+ "loss": 0.0507,
285
+ "rewards/accuracies": 0.6875,
286
+ "rewards/chosen": -1.009716510772705,
287
+ "rewards/margins": 0.762354850769043,
288
+ "rewards/rejected": -1.7720712423324585,
289
  "step": 200
290
  },
291
  {
292
+ "epoch": 0.44,
293
+ "learning_rate": 2.062588805414343e-06,
294
+ "logits/chosen": 0.04285336285829544,
295
+ "logits/rejected": 0.2198754996061325,
296
+ "logps/chosen": -396.66552734375,
297
+ "logps/rejected": -416.57763671875,
298
+ "loss": 0.0602,
299
+ "rewards/accuracies": 0.71875,
300
+ "rewards/chosen": -0.8250829577445984,
301
+ "rewards/margins": 0.6419562101364136,
302
+ "rewards/rejected": -1.4670391082763672,
303
  "step": 210
304
  },
305
  {
306
+ "epoch": 0.46,
307
+ "learning_rate": 1.9593447226892386e-06,
308
+ "logits/chosen": -0.06010212376713753,
309
+ "logits/rejected": 0.09760335832834244,
310
+ "logps/chosen": -397.46624755859375,
311
+ "logps/rejected": -395.8749084472656,
312
+ "loss": 0.0661,
313
+ "rewards/accuracies": 0.6312500238418579,
314
+ "rewards/chosen": -0.8078718185424805,
315
+ "rewards/margins": 0.5141724944114685,
316
+ "rewards/rejected": -1.3220441341400146,
317
  "step": 220
318
  },
319
  {
320
+ "epoch": 0.48,
321
+ "learning_rate": 1.853638403264141e-06,
322
+ "logits/chosen": -0.03770698606967926,
323
+ "logits/rejected": 0.013910258188843727,
324
+ "logps/chosen": -444.28985595703125,
325
+ "logps/rejected": -494.4593811035156,
326
+ "loss": 0.0507,
327
+ "rewards/accuracies": 0.668749988079071,
328
+ "rewards/chosen": -1.182284951210022,
329
+ "rewards/margins": 0.9311016798019409,
330
+ "rewards/rejected": -2.113386631011963,
331
  "step": 230
332
  },
333
  {
334
+ "epoch": 0.5,
335
+ "learning_rate": 1.7460364672965328e-06,
336
+ "logits/chosen": -0.16847534477710724,
337
+ "logits/rejected": -0.11212627589702606,
338
+ "logps/chosen": -446.85369873046875,
339
+ "logps/rejected": -486.97698974609375,
340
+ "loss": 0.051,
341
+ "rewards/accuracies": 0.6312500238418579,
342
+ "rewards/chosen": -1.3490196466445923,
343
+ "rewards/margins": 0.5016757845878601,
344
+ "rewards/rejected": -1.8506953716278076,
345
  "step": 240
346
  },
347
  {
348
+ "epoch": 0.52,
349
+ "learning_rate": 1.637115696063402e-06,
350
+ "logits/chosen": -0.20698556303977966,
351
+ "logits/rejected": -0.1366804540157318,
352
+ "logps/chosen": -411.64892578125,
353
+ "logps/rejected": -442.436279296875,
354
+ "loss": 0.0518,
355
+ "rewards/accuracies": 0.6937500238418579,
356
+ "rewards/chosen": -0.9960080981254578,
357
+ "rewards/margins": 0.6079148650169373,
358
+ "rewards/rejected": -1.6039228439331055,
359
  "step": 250
360
  },
361
  {
362
+ "epoch": 0.54,
363
+ "learning_rate": 1.5274599402265162e-06,
364
+ "logits/chosen": -0.30626335740089417,
365
+ "logits/rejected": -0.21004387736320496,
366
+ "logps/chosen": -493.10693359375,
367
+ "logps/rejected": -501.22113037109375,
368
+ "loss": 0.0491,
369
+ "rewards/accuracies": 0.65625,
370
+ "rewards/chosen": -1.3846315145492554,
371
+ "rewards/margins": 0.6269583106040955,
372
+ "rewards/rejected": -2.011589765548706,
373
  "step": 260
374
  },
375
  {
376
+ "epoch": 0.57,
377
+ "learning_rate": 1.4176569902035088e-06,
378
+ "logits/chosen": -0.21837463974952698,
379
+ "logits/rejected": -0.18761083483695984,
380
+ "logps/chosen": -464.00579833984375,
381
+ "logps/rejected": -516.2754516601562,
382
+ "loss": 0.0453,
383
+ "rewards/accuracies": 0.6812499761581421,
384
+ "rewards/chosen": -1.348719596862793,
385
+ "rewards/margins": 0.7095474600791931,
386
+ "rewards/rejected": -2.058267116546631,
387
  "step": 270
388
  },
389
  {
390
+ "epoch": 0.59,
391
+ "learning_rate": 1.308295425420593e-06,
392
+ "logits/chosen": -0.20622439682483673,
393
+ "logits/rejected": -0.22035178542137146,
394
+ "logps/chosen": -418.81097412109375,
395
+ "logps/rejected": -477.7355041503906,
396
+ "loss": 0.0522,
397
+ "rewards/accuracies": 0.6875,
398
+ "rewards/chosen": -1.2232805490493774,
399
+ "rewards/margins": 0.6434040665626526,
400
+ "rewards/rejected": -1.8666845560073853,
401
  "step": 280
402
  },
403
  {
404
+ "epoch": 0.61,
405
+ "learning_rate": 1.1999614593359337e-06,
406
+ "logits/chosen": -0.18864139914512634,
407
+ "logits/rejected": -0.12041006982326508,
408
+ "logps/chosen": -460.80059814453125,
409
+ "logps/rejected": -523.751953125,
410
+ "loss": 0.0502,
411
+ "rewards/accuracies": 0.768750011920929,
412
+ "rewards/chosen": -1.1436948776245117,
413
+ "rewards/margins": 0.8658909797668457,
414
+ "rewards/rejected": -2.0095858573913574,
415
  "step": 290
416
  },
417
  {
418
+ "epoch": 0.63,
419
+ "learning_rate": 1.0932357971453745e-06,
420
+ "logits/chosen": -0.142107754945755,
421
+ "logits/rejected": -0.11606737226247787,
422
+ "logps/chosen": -407.21630859375,
423
+ "logps/rejected": -519.4342041015625,
424
+ "loss": 0.0422,
425
+ "rewards/accuracies": 0.731249988079071,
426
+ "rewards/chosen": -1.1546132564544678,
427
+ "rewards/margins": 0.8063042759895325,
428
+ "rewards/rejected": -1.9609174728393555,
429
  "step": 300
430
  },
431
  {
432
+ "epoch": 0.65,
433
+ "learning_rate": 9.886905230142433e-07,
434
+ "logits/chosen": -0.10813410580158234,
435
+ "logits/rejected": -0.10192851722240448,
436
+ "logps/chosen": -414.4422912597656,
437
+ "logps/rejected": -515.26416015625,
438
+ "loss": 0.0439,
439
+ "rewards/accuracies": 0.7250000238418579,
440
+ "rewards/chosen": -1.1871048212051392,
441
+ "rewards/margins": 0.8484551310539246,
442
+ "rewards/rejected": -2.035560131072998,
443
  "step": 310
444
  },
445
  {
446
+ "epoch": 0.67,
447
+ "learning_rate": 8.868860335206678e-07,
448
+ "logits/chosen": -0.10642366111278534,
449
+ "logits/rejected": 0.012383558787405491,
450
+ "logps/chosen": -418.55712890625,
451
+ "logps/rejected": -490.4601135253906,
452
+ "loss": 0.0478,
453
+ "rewards/accuracies": 0.7250000238418579,
454
+ "rewards/chosen": -1.0292608737945557,
455
+ "rewards/margins": 0.9733268618583679,
456
+ "rewards/rejected": -2.0025877952575684,
457
  "step": 320
458
  },
459
  {
460
+ "epoch": 0.69,
461
+ "learning_rate": 7.883680337481599e-07,
462
+ "logits/chosen": -0.05966154858469963,
463
+ "logits/rejected": 0.014715162105858326,
464
+ "logps/chosen": -396.83026123046875,
465
+ "logps/rejected": -494.3729553222656,
466
+ "loss": 0.047,
467
+ "rewards/accuracies": 0.71875,
468
+ "rewards/chosen": -0.9806185960769653,
469
+ "rewards/margins": 0.8556007146835327,
470
+ "rewards/rejected": -1.8362191915512085,
471
  "step": 330
472
  },
473
  {
474
+ "epoch": 0.71,
475
+ "learning_rate": 6.936646121293654e-07,
476
+ "logits/chosen": -0.10212980210781097,
477
+ "logits/rejected": -0.02083268202841282,
478
+ "logps/chosen": -382.59844970703125,
479
+ "logps/rejected": -440.49908447265625,
480
+ "loss": 0.0489,
481
+ "rewards/accuracies": 0.7562500238418579,
482
+ "rewards/chosen": -0.959019660949707,
483
+ "rewards/margins": 0.8376408815383911,
484
+ "rewards/rejected": -1.7966604232788086,
485
  "step": 340
486
  },
487
  {
488
+ "epoch": 0.73,
489
+ "learning_rate": 6.032834097207889e-07,
490
+ "logits/chosen": -0.14827466011047363,
491
+ "logits/rejected": -0.1494934856891632,
492
+ "logps/chosen": -398.29461669921875,
493
+ "logps/rejected": -487.94012451171875,
494
+ "loss": 0.0474,
495
+ "rewards/accuracies": 0.6937500238418579,
496
+ "rewards/chosen": -0.9679247736930847,
497
+ "rewards/margins": 0.805729866027832,
498
+ "rewards/rejected": -1.7736546993255615,
499
  "step": 350
500
  },
501
+ {
502
+ "epoch": 0.75,
503
+ "learning_rate": 5.177088990820725e-07,
504
+ "logits/chosen": -0.19919797778129578,
505
+ "logits/rejected": -0.08585543930530548,
506
+ "logps/chosen": -423.6690979003906,
507
+ "logps/rejected": -448.3824768066406,
508
+ "loss": 0.0483,
509
+ "rewards/accuracies": 0.7562500238418579,
510
+ "rewards/chosen": -0.9551182985305786,
511
+ "rewards/margins": 0.7068794965744019,
512
+ "rewards/rejected": -1.6619977951049805,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 0.77,
517
+ "learning_rate": 4.3739978734594494e-07,
518
+ "logits/chosen": -0.19064149260520935,
519
+ "logits/rejected": -0.08522866666316986,
520
+ "logps/chosen": -448.2352600097656,
521
+ "logps/rejected": -489.485107421875,
522
+ "loss": 0.0495,
523
+ "rewards/accuracies": 0.706250011920929,
524
+ "rewards/chosen": -0.9980375170707703,
525
+ "rewards/margins": 0.7906314134597778,
526
+ "rewards/rejected": -1.7886688709259033,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 0.8,
531
+ "learning_rate": 3.627865573992087e-07,
532
+ "logits/chosen": -0.13051238656044006,
533
+ "logits/rejected": -0.06193440407514572,
534
+ "logps/chosen": -398.67425537109375,
535
+ "logps/rejected": -444.31951904296875,
536
+ "loss": 0.0518,
537
+ "rewards/accuracies": 0.7250000238418579,
538
+ "rewards/chosen": -1.0560102462768555,
539
+ "rewards/margins": 0.6653419137001038,
540
+ "rewards/rejected": -1.721352219581604,
541
+ "step": 380
542
+ },
543
+ {
544
+ "epoch": 0.82,
545
+ "learning_rate": 2.9426916035484166e-07,
546
+ "logits/chosen": -0.24809321761131287,
547
+ "logits/rejected": -0.13946378231048584,
548
+ "logps/chosen": -439.02069091796875,
549
+ "logps/rejected": -481.7464904785156,
550
+ "loss": 0.0461,
551
+ "rewards/accuracies": 0.737500011920929,
552
+ "rewards/chosen": -0.9732930064201355,
553
+ "rewards/margins": 0.8343151807785034,
554
+ "rewards/rejected": -1.8076083660125732,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 0.84,
559
+ "learning_rate": 2.322148716843081e-07,
560
+ "logits/chosen": -0.17726832628250122,
561
+ "logits/rejected": -0.10749037563800812,
562
+ "logps/chosen": -414.79779052734375,
563
+ "logps/rejected": -467.64593505859375,
564
+ "loss": 0.0514,
565
+ "rewards/accuracies": 0.75,
566
+ "rewards/chosen": -0.9779269099235535,
567
+ "rewards/margins": 0.7506555318832397,
568
+ "rewards/rejected": -1.7285826206207275,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 0.86,
573
+ "learning_rate": 1.7695632250191002e-07,
574
+ "logits/chosen": -0.2721072733402252,
575
+ "logits/rejected": -0.2248070240020752,
576
+ "logps/chosen": -403.7864685058594,
577
+ "logps/rejected": -474.9762268066406,
578
+ "loss": 0.0427,
579
+ "rewards/accuracies": 0.6499999761581421,
580
+ "rewards/chosen": -1.1458890438079834,
581
+ "rewards/margins": 0.6978408098220825,
582
+ "rewards/rejected": -1.8437299728393555,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 0.88,
587
+ "learning_rate": 1.2878971655412515e-07,
588
+ "logits/chosen": -0.15144512057304382,
589
+ "logits/rejected": -0.11361583322286606,
590
+ "logps/chosen": -396.65521240234375,
591
+ "logps/rejected": -473.33447265625,
592
+ "loss": 0.0429,
593
+ "rewards/accuracies": 0.6875,
594
+ "rewards/chosen": -1.0332143306732178,
595
+ "rewards/margins": 0.8435841798782349,
596
+ "rewards/rejected": -1.876798391342163,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 0.9,
601
+ "learning_rate": 8.797324247145411e-08,
602
+ "logits/chosen": -0.18158239126205444,
603
+ "logits/rejected": -0.16928087174892426,
604
+ "logps/chosen": -459.45928955078125,
605
+ "logps/rejected": -527.222412109375,
606
+ "loss": 0.0479,
607
+ "rewards/accuracies": 0.7437499761581421,
608
+ "rewards/chosen": -1.2538243532180786,
609
+ "rewards/margins": 0.7805837392807007,
610
+ "rewards/rejected": -2.0344080924987793,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 0.92,
615
+ "learning_rate": 5.472568979361853e-08,
616
+ "logits/chosen": -0.1764029711484909,
617
+ "logits/rejected": -0.1563466489315033,
618
+ "logps/chosen": -399.78094482421875,
619
+ "logps/rejected": -463.10284423828125,
620
+ "loss": 0.0473,
621
+ "rewards/accuracies": 0.6875,
622
+ "rewards/chosen": -1.1522372961044312,
623
+ "rewards/margins": 0.7010194063186646,
624
+ "rewards/rejected": -1.8532568216323853,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 0.94,
629
+ "learning_rate": 2.922527618666465e-08,
630
+ "logits/chosen": -0.23132136464118958,
631
+ "logits/rejected": -0.0833607167005539,
632
+ "logps/chosen": -476.85986328125,
633
+ "logps/rejected": -498.6736755371094,
634
+ "loss": 0.0442,
635
+ "rewards/accuracies": 0.7124999761581421,
636
+ "rewards/chosen": -1.269084095954895,
637
+ "rewards/margins": 0.8781474232673645,
638
+ "rewards/rejected": -2.1472315788269043,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 0.96,
643
+ "learning_rate": 1.1608692138469379e-08,
644
+ "logits/chosen": -0.17594662308692932,
645
+ "logits/rejected": 0.0026182211004197598,
646
+ "logps/chosen": -404.1844177246094,
647
+ "logps/rejected": -453.0740661621094,
648
+ "loss": 0.0409,
649
+ "rewards/accuracies": 0.7124999761581421,
650
+ "rewards/chosen": -1.0624682903289795,
651
+ "rewards/margins": 0.810712456703186,
652
+ "rewards/rejected": -1.8731807470321655,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 0.98,
657
+ "learning_rate": 1.970368253390198e-09,
658
+ "logits/chosen": -0.20819933712482452,
659
+ "logits/rejected": -0.09198556840419769,
660
+ "logps/chosen": -453.80291748046875,
661
+ "logps/rejected": -505.82733154296875,
662
+ "loss": 0.0462,
663
+ "rewards/accuracies": 0.75,
664
+ "rewards/chosen": -1.1465378999710083,
665
+ "rewards/margins": 0.9369093179702759,
666
+ "rewards/rejected": -2.083447217941284,
667
+ "step": 470
668
+ },
669
  {
670
  "epoch": 1.0,
671
+ "step": 477,
672
  "total_flos": 0.0,
673
+ "train_loss": 0.06463673111027891,
674
+ "train_runtime": 6408.5161,
675
+ "train_samples_per_second": 9.539,
676
+ "train_steps_per_second": 0.074
677
  }
678
  ],
679
  "logging_steps": 10,
680
+ "max_steps": 477,
681
  "num_train_epochs": 1,
682
  "save_steps": 10000,
683
  "total_flos": 0.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75be8a76f25532ae2230808d2c77d6bec171d0619c7c964b4a7bc7c485ab42d1
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bbe91eae1fd7d578def68ada6516b84b7a8f45cd2735b45f8b5198ffb913cb1
3
  size 6648