jikaixuan commited on
Commit
cccb458
1 Parent(s): e011256

Model save

Browse files
Files changed (6) hide show
  1. README.md +12 -12
  2. adapter_model.safetensors +1 -1
  3. all_results.json +17 -17
  4. eval_results.json +14 -14
  5. train_results.json +3 -3
  6. trainer_state.json +1026 -1026
README.md CHANGED
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.0758
19
- - Rewards/chosen: -15.6780
20
- - Rewards/rejected: -27.9661
21
- - Rewards/accuracies: 0.7080
22
- - Rewards/margins: 12.2881
23
- - Logps/rejected: -538.9609
24
- - Logps/chosen: -441.0378
25
- - Logits/rejected: -2.2748
26
- - Logits/chosen: -2.3871
27
- - Use Label: 2488.2161
28
- - Pred Label: 13543.7842
29
 
30
  ## Model description
31
 
@@ -62,7 +62,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.069 | 1.0 | 955 | 0.0758 | -15.6780 | -27.9661 | 0.7080 | 12.2881 | -538.9609 | -441.0378 | -2.2748 | -2.3871 | 2447.2161 | 13084.7842 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.2409
19
+ - Rewards/chosen: -2.7432
20
+ - Rewards/rejected: -6.3660
21
+ - Rewards/accuracies: 0.7340
22
+ - Rewards/margins: 3.6228
23
+ - Logps/rejected: -322.9595
24
+ - Logps/chosen: -311.6890
25
+ - Logits/rejected: -2.6650
26
+ - Logits/chosen: -2.6975
27
+ - Use Label: 6842.4238
28
+ - Pred Label: 9189.5762
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
+ | 0.2135 | 1.0 | 955 | 0.2409 | -2.7432 | -6.3660 | 0.7340 | 3.6228 | -322.9595 | -311.6890 | -2.6650 | -2.6975 | 6698.4238 | 8833.5762 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca0c394a901dd04cd641751911338244418474e4f57005089094c0c24ab49e58
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4e6feb10113e9de9b08081514996934a6b42d53e8760d60528af986c57ddb4
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.3870656490325928,
4
- "eval_logits/rejected": -2.274824380874634,
5
- "eval_logps/chosen": -441.0377502441406,
6
- "eval_logps/rejected": -538.9608764648438,
7
- "eval_loss": 0.07583338022232056,
8
- "eval_pred_label": 13543.7841796875,
9
- "eval_rewards/accuracies": 0.7080000042915344,
10
- "eval_rewards/chosen": -15.678034782409668,
11
- "eval_rewards/margins": 12.288079261779785,
12
- "eval_rewards/rejected": -27.966114044189453,
13
- "eval_runtime": 449.5661,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.449,
16
- "eval_steps_per_second": 0.278,
17
- "eval_use_label": 2488.216064453125,
18
- "train_loss": 0.12920980815488006,
19
- "train_runtime": 25162.7962,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.43,
22
  "train_steps_per_second": 0.038
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.6974706649780273,
4
+ "eval_logits/rejected": -2.665019989013672,
5
+ "eval_logps/chosen": -311.6889953613281,
6
+ "eval_logps/rejected": -322.95947265625,
7
+ "eval_loss": 0.2408759742975235,
8
+ "eval_pred_label": 9189.576171875,
9
+ "eval_rewards/accuracies": 0.734000027179718,
10
+ "eval_rewards/chosen": -2.7431609630584717,
11
+ "eval_rewards/margins": 3.6228184700012207,
12
+ "eval_rewards/rejected": -6.36598014831543,
13
+ "eval_runtime": 452.5604,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.419,
16
+ "eval_steps_per_second": 0.276,
17
+ "eval_use_label": 6842.423828125,
18
+ "train_loss": 0.31699458866219243,
19
+ "train_runtime": 25218.7851,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.424,
22
  "train_steps_per_second": 0.038
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -2.3870656490325928,
4
- "eval_logits/rejected": -2.274824380874634,
5
- "eval_logps/chosen": -441.0377502441406,
6
- "eval_logps/rejected": -538.9608764648438,
7
- "eval_loss": 0.07583338022232056,
8
- "eval_pred_label": 13543.7841796875,
9
- "eval_rewards/accuracies": 0.7080000042915344,
10
- "eval_rewards/chosen": -15.678034782409668,
11
- "eval_rewards/margins": 12.288079261779785,
12
- "eval_rewards/rejected": -27.966114044189453,
13
- "eval_runtime": 449.5661,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.449,
16
- "eval_steps_per_second": 0.278,
17
- "eval_use_label": 2488.216064453125
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.6974706649780273,
4
+ "eval_logits/rejected": -2.665019989013672,
5
+ "eval_logps/chosen": -311.6889953613281,
6
+ "eval_logps/rejected": -322.95947265625,
7
+ "eval_loss": 0.2408759742975235,
8
+ "eval_pred_label": 9189.576171875,
9
+ "eval_rewards/accuracies": 0.734000027179718,
10
+ "eval_rewards/chosen": -2.7431609630584717,
11
+ "eval_rewards/margins": 3.6228184700012207,
12
+ "eval_rewards/rejected": -6.36598014831543,
13
+ "eval_runtime": 452.5604,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.419,
16
+ "eval_steps_per_second": 0.276,
17
+ "eval_use_label": 6842.423828125
18
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.12920980815488006,
4
- "train_runtime": 25162.7962,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.43,
7
  "train_steps_per_second": 0.038
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.31699458866219243,
4
+ "train_runtime": 25218.7851,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.424,
7
  "train_steps_per_second": 0.038
8
  }
trainer_state.json CHANGED
@@ -75,1500 +75,1500 @@
75
  {
76
  "epoch": 0.04,
77
  "learning_rate": 2.0833333333333336e-05,
78
- "logits/chosen": -2.841322422027588,
79
- "logits/rejected": -2.84962797164917,
80
- "logps/chosen": -281.39862060546875,
81
- "logps/rejected": -277.9919738769531,
82
- "loss": 0.6236,
83
- "pred_label": 0.875,
84
- "rewards/accuracies": 0.6812499761581421,
85
- "rewards/chosen": 0.01947467401623726,
86
- "rewards/margins": 0.21414700150489807,
87
- "rewards/rejected": -0.19467231631278992,
88
  "step": 40,
89
- "use_label": 561.125
90
  },
91
  {
92
  "epoch": 0.05,
93
  "learning_rate": 2.604166666666667e-05,
94
- "logits/chosen": -2.8523213863372803,
95
- "logits/rejected": -2.836536169052124,
96
- "logps/chosen": -267.1900329589844,
97
- "logps/rejected": -263.0260925292969,
98
- "loss": 0.5468,
99
- "pred_label": 18.174999237060547,
100
- "rewards/accuracies": 0.637499988079071,
101
- "rewards/chosen": -0.10816816240549088,
102
- "rewards/margins": 0.4027964472770691,
103
- "rewards/rejected": -0.5109646320343018,
104
  "step": 50,
105
- "use_label": 703.8250122070312
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
- "logits/chosen": -2.8082404136657715,
111
- "logits/rejected": -2.7957923412323,
112
- "logps/chosen": -302.7262878417969,
113
- "logps/rejected": -294.58624267578125,
114
- "loss": 0.4674,
115
- "pred_label": 66.9000015258789,
116
- "rewards/accuracies": 0.706250011920929,
117
- "rewards/chosen": -0.2311042845249176,
118
- "rewards/margins": 0.616974413394928,
119
- "rewards/rejected": -0.8480786085128784,
120
  "step": 60,
121
- "use_label": 815.0999755859375
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.6458333333333336e-05,
126
- "logits/chosen": -2.8012547492980957,
127
- "logits/rejected": -2.788729429244995,
128
- "logps/chosen": -299.8072204589844,
129
- "logps/rejected": -288.31036376953125,
130
- "loss": 0.3802,
131
- "pred_label": 144.4499969482422,
132
- "rewards/accuracies": 0.737500011920929,
133
- "rewards/chosen": -0.5493733286857605,
134
- "rewards/margins": 0.9851129651069641,
135
- "rewards/rejected": -1.534486174583435,
136
  "step": 70,
137
- "use_label": 897.5499877929688
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
- "logits/chosen": -2.737917423248291,
143
- "logits/rejected": -2.7173783779144287,
144
- "logps/chosen": -318.4990234375,
145
- "logps/rejected": -307.38311767578125,
146
- "loss": 0.2535,
147
- "pred_label": 244.1999969482422,
148
- "rewards/accuracies": 0.7124999761581421,
149
- "rewards/chosen": -1.3193671703338623,
150
- "rewards/margins": 1.5176814794540405,
151
- "rewards/rejected": -2.8370487689971924,
152
  "step": 80,
153
- "use_label": 957.7999877929688
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6875e-05,
158
- "logits/chosen": -2.650569200515747,
159
- "logits/rejected": -2.7028753757476807,
160
- "logps/chosen": -316.5365295410156,
161
- "logps/rejected": -307.62799072265625,
162
- "loss": 0.2342,
163
- "pred_label": 363.79998779296875,
164
- "rewards/accuracies": 0.7250000238418579,
165
- "rewards/chosen": -2.0753350257873535,
166
- "rewards/margins": 2.060016632080078,
167
- "rewards/rejected": -4.135351657867432,
168
  "step": 90,
169
- "use_label": 998.2000122070312
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922003e-05,
174
- "logits/chosen": -2.664170265197754,
175
- "logits/rejected": -2.6609156131744385,
176
- "logps/chosen": -301.24493408203125,
177
- "logps/rejected": -345.8691101074219,
178
- "loss": 0.2337,
179
- "pred_label": 482.125,
180
- "rewards/accuracies": 0.6875,
181
- "rewards/chosen": -2.711317300796509,
182
- "rewards/margins": 2.521953582763672,
183
- "rewards/rejected": -5.23327112197876,
184
  "step": 100,
185
- "use_label": 1039.875
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227008e-05,
190
- "logits/chosen": -2.6410791873931885,
191
- "logits/rejected": -2.587087631225586,
192
- "logps/chosen": -328.8195495605469,
193
- "logps/rejected": -354.773193359375,
194
- "loss": 0.1381,
195
- "pred_label": 612.7750244140625,
196
- "rewards/accuracies": 0.675000011920929,
197
- "rewards/chosen": -6.414492607116699,
198
- "rewards/margins": 4.145485877990723,
199
- "rewards/rejected": -10.559977531433105,
200
  "step": 110,
201
- "use_label": 1069.2249755859375
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-05,
206
- "logits/chosen": -2.485827684402466,
207
- "logits/rejected": -2.4543118476867676,
208
- "logps/chosen": -401.1512756347656,
209
- "logps/rejected": -415.895263671875,
210
- "loss": 0.1156,
211
- "pred_label": 747.8499755859375,
212
- "rewards/accuracies": 0.699999988079071,
213
- "rewards/chosen": -12.163839340209961,
214
- "rewards/margins": 4.657315254211426,
215
- "rewards/rejected": -16.821155548095703,
216
  "step": 120,
217
- "use_label": 1094.1500244140625
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.80209545983702e-05,
222
- "logits/chosen": -2.586129665374756,
223
- "logits/rejected": -2.4943180084228516,
224
- "logps/chosen": -397.7897033691406,
225
- "logps/rejected": -390.68902587890625,
226
- "loss": 0.181,
227
- "pred_label": 880.6749877929688,
228
- "rewards/accuracies": 0.625,
229
- "rewards/chosen": -9.052229881286621,
230
- "rewards/margins": 2.705533742904663,
231
- "rewards/rejected": -11.757763862609863,
232
  "step": 130,
233
- "use_label": 1121.324951171875
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-05,
238
- "logits/chosen": -2.4943366050720215,
239
- "logits/rejected": -2.467221736907959,
240
- "logps/chosen": -360.2618103027344,
241
- "logps/rejected": -391.42254638671875,
242
- "loss": 0.1307,
243
- "pred_label": 1014.9000244140625,
244
- "rewards/accuracies": 0.6499999761581421,
245
- "rewards/chosen": -10.215502738952637,
246
- "rewards/margins": 5.424862384796143,
247
- "rewards/rejected": -15.640365600585938,
248
  "step": 140,
249
- "use_label": 1147.0999755859375
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447032e-05,
254
- "logits/chosen": -2.448756456375122,
255
- "logits/rejected": -2.4686672687530518,
256
- "logps/chosen": -423.26348876953125,
257
- "logps/rejected": -462.45587158203125,
258
- "loss": 0.1291,
259
- "pred_label": 1152.449951171875,
260
  "rewards/accuracies": 0.612500011920929,
261
- "rewards/chosen": -12.837747573852539,
262
- "rewards/margins": 5.233094692230225,
263
- "rewards/rejected": -18.070842742919922,
264
  "step": 150,
265
- "use_label": 1169.550048828125
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.6274738067520374e-05,
270
- "logits/chosen": -2.5502519607543945,
271
- "logits/rejected": -2.4940075874328613,
272
- "logps/chosen": -338.2818908691406,
273
- "logps/rejected": -399.33062744140625,
274
- "loss": 0.1221,
275
- "pred_label": 1286.300048828125,
276
- "rewards/accuracies": 0.699999988079071,
277
- "rewards/chosen": -7.589987754821777,
278
- "rewards/margins": 5.438824653625488,
279
- "rewards/rejected": -13.02881145477295,
280
  "step": 160,
281
- "use_label": 1195.699951171875
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570435e-05,
286
- "logits/chosen": -2.3367486000061035,
287
- "logits/rejected": -2.268026351928711,
288
- "logps/chosen": -608.579833984375,
289
- "logps/rejected": -566.7401123046875,
290
- "loss": 0.0863,
291
- "pred_label": 1421.699951171875,
292
- "rewards/accuracies": 0.637499988079071,
293
- "rewards/chosen": -31.210372924804688,
294
- "rewards/margins": 0.4991304278373718,
295
- "rewards/rejected": -31.70950698852539,
296
  "step": 170,
297
- "use_label": 1220.300048828125
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.511059371362049e-05,
302
- "logits/chosen": -2.5058579444885254,
303
- "logits/rejected": -2.460585117340088,
304
- "logps/chosen": -369.10260009765625,
305
- "logps/rejected": -435.2815856933594,
306
- "loss": 0.1377,
307
- "pred_label": 1565.5,
308
- "rewards/accuracies": 0.6937500238418579,
309
- "rewards/chosen": -11.317340850830078,
310
- "rewards/margins": 5.142584800720215,
311
- "rewards/rejected": -16.459924697875977,
312
  "step": 180,
313
- "use_label": 1236.5
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.452852153667055e-05,
318
- "logits/chosen": -2.689570665359497,
319
- "logits/rejected": -2.6224443912506104,
320
- "logps/chosen": -352.5258483886719,
321
- "logps/rejected": -360.037841796875,
322
- "loss": 0.1658,
323
- "pred_label": 1696.5,
324
- "rewards/accuracies": 0.78125,
325
- "rewards/chosen": -4.5269904136657715,
326
- "rewards/margins": 5.3725738525390625,
327
- "rewards/rejected": -9.899563789367676,
328
  "step": 190,
329
- "use_label": 1265.5
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.394644935972061e-05,
334
- "logits/chosen": -2.663055658340454,
335
- "logits/rejected": -2.6471009254455566,
336
- "logps/chosen": -329.58465576171875,
337
- "logps/rejected": -377.0941467285156,
338
- "loss": 0.1567,
339
- "pred_label": 1831.4000244140625,
340
- "rewards/accuracies": 0.668749988079071,
341
- "rewards/chosen": -6.82558536529541,
342
- "rewards/margins": 4.059788227081299,
343
- "rewards/rejected": -10.8853759765625,
344
  "step": 200,
345
- "use_label": 1290.5999755859375
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277067e-05,
350
- "logits/chosen": -2.6221835613250732,
351
- "logits/rejected": -2.6028318405151367,
352
- "logps/chosen": -400.9311828613281,
353
- "logps/rejected": -399.74774169921875,
354
- "loss": 0.1038,
355
- "pred_label": 1967.925048828125,
356
- "rewards/accuracies": 0.6812499761581421,
357
- "rewards/chosen": -9.968328475952148,
358
- "rewards/margins": 4.587479591369629,
359
- "rewards/rejected": -14.555807113647461,
360
  "step": 210,
361
- "use_label": 1314.074951171875
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-05,
366
- "logits/chosen": -2.5327601432800293,
367
- "logits/rejected": -2.5170798301696777,
368
- "logps/chosen": -431.26373291015625,
369
- "logps/rejected": -491.82647705078125,
370
- "loss": 0.0838,
371
- "pred_label": 2115.125,
372
- "rewards/accuracies": 0.6937500238418579,
373
- "rewards/chosen": -15.627206802368164,
374
- "rewards/margins": 6.489752292633057,
375
- "rewards/rejected": -22.116960525512695,
376
  "step": 220,
377
- "use_label": 1326.875
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-05,
382
- "logits/chosen": -2.2676877975463867,
383
- "logits/rejected": -2.275028944015503,
384
- "logps/chosen": -759.7401123046875,
385
- "logps/rejected": -772.6349487304688,
386
- "loss": 0.0695,
387
- "pred_label": 2261.175048828125,
388
- "rewards/accuracies": 0.6187499761581421,
389
- "rewards/chosen": -45.156700134277344,
390
- "rewards/margins": 5.6958770751953125,
391
- "rewards/rejected": -50.852577209472656,
392
  "step": 230,
393
- "use_label": 1340.824951171875
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.161816065192084e-05,
398
- "logits/chosen": -2.720881700515747,
399
- "logits/rejected": -2.667050361633301,
400
- "logps/chosen": -346.90936279296875,
401
- "logps/rejected": -372.1588439941406,
402
- "loss": 0.1159,
403
- "pred_label": 2400.875,
404
- "rewards/accuracies": 0.706250011920929,
405
- "rewards/chosen": -7.2331953048706055,
406
- "rewards/margins": 7.680712699890137,
407
- "rewards/rejected": -14.913908004760742,
408
  "step": 240,
409
- "use_label": 1361.125
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.10360884749709e-05,
414
- "logits/chosen": -2.7150168418884277,
415
- "logits/rejected": -2.7007734775543213,
416
- "logps/chosen": -368.1573486328125,
417
- "logps/rejected": -409.0894470214844,
418
- "loss": 0.1438,
419
- "pred_label": 2528.425048828125,
420
- "rewards/accuracies": 0.7250000238418579,
421
- "rewards/chosen": -7.751723289489746,
422
- "rewards/margins": 6.2153401374816895,
423
- "rewards/rejected": -13.967063903808594,
424
  "step": 250,
425
- "use_label": 1393.574951171875
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.045401629802096e-05,
430
- "logits/chosen": -2.7338109016418457,
431
- "logits/rejected": -2.7302541732788086,
432
- "logps/chosen": -320.41326904296875,
433
- "logps/rejected": -374.7262878417969,
434
- "loss": 0.1538,
435
- "pred_label": 2655.949951171875,
436
- "rewards/accuracies": 0.65625,
437
- "rewards/chosen": -7.180386543273926,
438
- "rewards/margins": 5.263332843780518,
439
- "rewards/rejected": -12.443718910217285,
440
  "step": 260,
441
- "use_label": 1426.050048828125
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.9871944121071014e-05,
446
- "logits/chosen": -2.729078769683838,
447
- "logits/rejected": -2.7090542316436768,
448
- "logps/chosen": -375.9404296875,
449
- "logps/rejected": -433.7308654785156,
450
- "loss": 0.1174,
451
- "pred_label": 2794.300048828125,
452
- "rewards/accuracies": 0.78125,
453
- "rewards/chosen": -8.406911849975586,
454
- "rewards/margins": 7.4909772872924805,
455
- "rewards/rejected": -15.89788818359375,
456
  "step": 270,
457
- "use_label": 1447.699951171875
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.928987194412107e-05,
462
- "logits/chosen": -2.7120089530944824,
463
- "logits/rejected": -2.690830707550049,
464
- "logps/chosen": -403.83233642578125,
465
- "logps/rejected": -437.89410400390625,
466
- "loss": 0.0926,
467
- "pred_label": 2935.449951171875,
468
- "rewards/accuracies": 0.737500011920929,
469
- "rewards/chosen": -10.986837387084961,
470
- "rewards/margins": 6.579653263092041,
471
- "rewards/rejected": -17.566490173339844,
472
  "step": 280,
473
- "use_label": 1466.550048828125
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-05,
478
- "logits/chosen": -2.637876033782959,
479
- "logits/rejected": -2.5659611225128174,
480
- "logps/chosen": -455.518310546875,
481
- "logps/rejected": -462.74267578125,
482
- "loss": 0.0968,
483
- "pred_label": 3076.324951171875,
484
- "rewards/accuracies": 0.6187499761581421,
485
- "rewards/chosen": -16.259593963623047,
486
- "rewards/margins": 6.993022918701172,
487
- "rewards/rejected": -23.25261688232422,
488
  "step": 290,
489
- "use_label": 1485.675048828125
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022119e-05,
494
- "logits/chosen": -2.5933048725128174,
495
- "logits/rejected": -2.5245001316070557,
496
- "logps/chosen": -432.7098083496094,
497
- "logps/rejected": -497.24554443359375,
498
- "loss": 0.0835,
499
- "pred_label": 3217.25,
500
- "rewards/accuracies": 0.6499999761581421,
501
- "rewards/chosen": -16.903099060058594,
502
- "rewards/margins": 8.837041854858398,
503
- "rewards/rejected": -25.74013900756836,
504
  "step": 300,
505
- "use_label": 1504.75
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.7543655413271246e-05,
510
- "logits/chosen": -2.4902329444885254,
511
- "logits/rejected": -2.3905444145202637,
512
- "logps/chosen": -440.56085205078125,
513
- "logps/rejected": -660.7030029296875,
514
- "loss": 0.0981,
515
- "pred_label": 3356.77490234375,
516
- "rewards/accuracies": 0.7562500238418579,
517
- "rewards/chosen": -17.362918853759766,
518
- "rewards/margins": 19.8530330657959,
519
- "rewards/rejected": -37.21595764160156,
520
  "step": 310,
521
- "use_label": 1525.2249755859375
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.696158323632131e-05,
526
- "logits/chosen": -2.2354841232299805,
527
- "logits/rejected": -2.1421380043029785,
528
- "logps/chosen": -747.2213134765625,
529
- "logps/rejected": -914.2220458984375,
530
- "loss": 0.0656,
531
- "pred_label": 3500.35009765625,
532
- "rewards/accuracies": 0.6625000238418579,
533
- "rewards/chosen": -45.124141693115234,
534
- "rewards/margins": 18.217256546020508,
535
- "rewards/rejected": -63.341407775878906,
536
  "step": 320,
537
- "use_label": 1541.6500244140625
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-05,
542
- "logits/chosen": -2.2868189811706543,
543
- "logits/rejected": -2.2072067260742188,
544
- "logps/chosen": -698.5914306640625,
545
- "logps/rejected": -843.9619140625,
546
- "loss": 0.0646,
547
- "pred_label": 3651.77490234375,
548
- "rewards/accuracies": 0.6187499761581421,
549
- "rewards/chosen": -42.7642936706543,
550
- "rewards/margins": 18.20369529724121,
551
- "rewards/rejected": -60.96799850463867,
552
  "step": 330,
553
- "use_label": 1550.2249755859375
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-05,
558
- "logits/chosen": -2.1754813194274902,
559
- "logits/rejected": -2.1737990379333496,
560
- "logps/chosen": -933.4059448242188,
561
- "logps/rejected": -1077.489013671875,
562
- "loss": 0.0609,
563
- "pred_label": 3802.02490234375,
564
- "rewards/accuracies": 0.581250011920929,
565
- "rewards/chosen": -64.42259216308594,
566
- "rewards/margins": 19.241281509399414,
567
- "rewards/rejected": -83.66387176513672,
568
  "step": 340,
569
- "use_label": 1559.9749755859375
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.5215366705471484e-05,
574
- "logits/chosen": -2.3112852573394775,
575
- "logits/rejected": -2.2864885330200195,
576
- "logps/chosen": -736.1566162109375,
577
- "logps/rejected": -973.3111572265625,
578
- "loss": 0.0509,
579
- "pred_label": 3953.425048828125,
580
- "rewards/accuracies": 0.6875,
581
- "rewards/chosen": -43.89619064331055,
582
- "rewards/margins": 25.668701171875,
583
- "rewards/rejected": -69.56489562988281,
584
  "step": 350,
585
- "use_label": 1568.574951171875
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.463329452852154e-05,
590
- "logits/chosen": -2.3861846923828125,
591
- "logits/rejected": -2.348050117492676,
592
- "logps/chosen": -612.4452514648438,
593
- "logps/rejected": -722.9779663085938,
594
- "loss": 0.0589,
595
- "pred_label": 4104.22509765625,
596
- "rewards/accuracies": 0.6312500238418579,
597
- "rewards/chosen": -33.965110778808594,
598
- "rewards/margins": 11.812593460083008,
599
- "rewards/rejected": -45.777706146240234,
600
  "step": 360,
601
- "use_label": 1577.7750244140625
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.40512223515716e-05,
606
- "logits/chosen": -2.5135788917541504,
607
- "logits/rejected": -2.431811571121216,
608
- "logps/chosen": -424.55120849609375,
609
- "logps/rejected": -567.4149780273438,
610
- "loss": 0.0861,
611
- "pred_label": 4248.5498046875,
612
- "rewards/accuracies": 0.7124999761581421,
613
- "rewards/chosen": -14.834848403930664,
614
- "rewards/margins": 14.3344144821167,
615
- "rewards/rejected": -29.169261932373047,
616
  "step": 370,
617
- "use_label": 1593.449951171875
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.3469150174621654e-05,
622
- "logits/chosen": -2.5888447761535645,
623
- "logits/rejected": -2.5503790378570557,
624
- "logps/chosen": -432.38995361328125,
625
- "logps/rejected": -449.0523986816406,
626
- "loss": 0.1037,
627
- "pred_label": 4388.47509765625,
628
- "rewards/accuracies": 0.699999988079071,
629
- "rewards/chosen": -14.715968132019043,
630
- "rewards/margins": 4.131407260894775,
631
- "rewards/rejected": -18.847375869750977,
632
  "step": 380,
633
- "use_label": 1613.5250244140625
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-05,
638
- "logits/chosen": -2.4642863273620605,
639
- "logits/rejected": -2.3922629356384277,
640
- "logps/chosen": -390.8463439941406,
641
- "logps/rejected": -461.17919921875,
642
- "loss": 0.1182,
643
- "pred_label": 4530.875,
644
- "rewards/accuracies": 0.731249988079071,
645
- "rewards/chosen": -10.805445671081543,
646
- "rewards/margins": 10.884265899658203,
647
- "rewards/rejected": -21.689708709716797,
648
  "step": 390,
649
- "use_label": 1631.125
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.2305005820721776e-05,
654
- "logits/chosen": -2.708742141723633,
655
- "logits/rejected": -2.668813943862915,
656
- "logps/chosen": -360.8929138183594,
657
- "logps/rejected": -357.1689758300781,
658
- "loss": 0.1227,
659
- "pred_label": 4662.125,
660
- "rewards/accuracies": 0.675000011920929,
661
- "rewards/chosen": -5.38419246673584,
662
- "rewards/margins": 4.3987908363342285,
663
- "rewards/rejected": -9.782983779907227,
664
  "step": 400,
665
- "use_label": 1659.875
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.172293364377183e-05,
670
- "logits/chosen": -2.703439474105835,
671
- "logits/rejected": -2.6409201622009277,
672
- "logps/chosen": -355.69219970703125,
673
- "logps/rejected": -346.83416748046875,
674
- "loss": 0.1313,
675
- "pred_label": 4792.97509765625,
676
- "rewards/accuracies": 0.606249988079071,
677
- "rewards/chosen": -5.759453773498535,
678
- "rewards/margins": 3.0691254138946533,
679
- "rewards/rejected": -8.828579902648926,
680
  "step": 410,
681
- "use_label": 1689.0250244140625
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-05,
686
- "logits/chosen": -2.617567539215088,
687
- "logits/rejected": -2.578650951385498,
688
- "logps/chosen": -307.09552001953125,
689
- "logps/rejected": -360.4197998046875,
690
- "loss": 0.1338,
691
- "pred_label": 4930.9501953125,
692
- "rewards/accuracies": 0.706250011920929,
693
- "rewards/chosen": -6.651512145996094,
694
- "rewards/margins": 4.47898006439209,
695
- "rewards/rejected": -11.130491256713867,
696
  "step": 420,
697
- "use_label": 1711.050048828125
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.055878928987195e-05,
702
- "logits/chosen": -2.6378731727600098,
703
- "logits/rejected": -2.5847997665405273,
704
- "logps/chosen": -359.0061340332031,
705
- "logps/rejected": -389.7403564453125,
706
- "loss": 0.1136,
707
- "pred_label": 5066.6748046875,
708
- "rewards/accuracies": 0.668749988079071,
709
- "rewards/chosen": -8.110578536987305,
710
- "rewards/margins": 4.911756992340088,
711
- "rewards/rejected": -13.02233600616455,
712
  "step": 430,
713
- "use_label": 1735.324951171875
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922005e-05,
718
- "logits/chosen": -2.705080509185791,
719
- "logits/rejected": -2.6582016944885254,
720
- "logps/chosen": -361.77703857421875,
721
- "logps/rejected": -369.16888427734375,
722
- "loss": 0.1282,
723
- "pred_label": 5203.0498046875,
724
- "rewards/accuracies": 0.65625,
725
- "rewards/chosen": -7.850671291351318,
726
- "rewards/margins": 3.7665488719940186,
727
- "rewards/rejected": -11.617219924926758,
728
  "step": 440,
729
- "use_label": 1758.949951171875
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-05,
734
- "logits/chosen": -2.387315273284912,
735
- "logits/rejected": -2.332853317260742,
736
- "logps/chosen": -343.62481689453125,
737
- "logps/rejected": -404.4623107910156,
738
- "loss": 0.1146,
739
- "pred_label": 5345.10009765625,
740
- "rewards/accuracies": 0.637499988079071,
741
- "rewards/chosen": -11.390606880187988,
742
- "rewards/margins": 6.090449333190918,
743
- "rewards/rejected": -17.481056213378906,
744
  "step": 450,
745
- "use_label": 1776.9000244140625
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.881257275902212e-05,
750
- "logits/chosen": -2.5489556789398193,
751
- "logits/rejected": -2.5125844478607178,
752
- "logps/chosen": -429.1163024902344,
753
- "logps/rejected": -433.0736389160156,
754
- "loss": 0.1077,
755
- "pred_label": 5487.8251953125,
756
- "rewards/accuracies": 0.6499999761581421,
757
- "rewards/chosen": -12.731410026550293,
758
- "rewards/margins": 3.6760268211364746,
759
- "rewards/rejected": -16.40743637084961,
760
  "step": 460,
761
- "use_label": 1794.175048828125
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072178e-05,
766
- "logits/chosen": -2.482365369796753,
767
- "logits/rejected": -2.3831024169921875,
768
- "logps/chosen": -389.73785400390625,
769
- "logps/rejected": -430.03973388671875,
770
- "loss": 0.0982,
771
- "pred_label": 5632.7998046875,
772
- "rewards/accuracies": 0.65625,
773
- "rewards/chosen": -12.2637357711792,
774
- "rewards/margins": 5.055985450744629,
775
- "rewards/rejected": -17.319721221923828,
776
  "step": 470,
777
- "use_label": 1809.199951171875
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-05,
782
- "logits/chosen": -2.414053201675415,
783
- "logits/rejected": -2.3774731159210205,
784
- "logps/chosen": -391.9467468261719,
785
- "logps/rejected": -501.77960205078125,
786
- "loss": 0.094,
787
- "pred_label": 5778.2001953125,
788
- "rewards/accuracies": 0.6625000238418579,
789
- "rewards/chosen": -14.637247085571289,
790
- "rewards/margins": 10.688766479492188,
791
- "rewards/rejected": -25.32601547241211,
792
  "step": 480,
793
- "use_label": 1823.800048828125
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.7066356228172297e-05,
798
- "logits/chosen": -2.541294574737549,
799
- "logits/rejected": -2.4439327716827393,
800
- "logps/chosen": -361.71771240234375,
801
- "logps/rejected": -378.765869140625,
802
- "loss": 0.0927,
803
- "pred_label": 5916.3251953125,
804
- "rewards/accuracies": 0.699999988079071,
805
- "rewards/chosen": -9.4688720703125,
806
- "rewards/margins": 4.668297290802002,
807
- "rewards/rejected": -14.137168884277344,
808
  "step": 490,
809
- "use_label": 1845.675048828125
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.6484284051222352e-05,
814
- "logits/chosen": -2.6095941066741943,
815
- "logits/rejected": -2.4995357990264893,
816
- "logps/chosen": -364.07427978515625,
817
- "logps/rejected": -420.4052734375,
818
- "loss": 0.1016,
819
- "pred_label": 6059.4501953125,
820
- "rewards/accuracies": 0.71875,
821
- "rewards/chosen": -7.4617791175842285,
822
- "rewards/margins": 8.752523422241211,
823
- "rewards/rejected": -16.214303970336914,
824
  "step": 500,
825
- "use_label": 1862.550048828125
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-05,
830
- "logits/chosen": -2.570039749145508,
831
- "logits/rejected": -2.480402946472168,
832
- "logps/chosen": -391.964111328125,
833
- "logps/rejected": -434.7195739746094,
834
- "loss": 0.1051,
835
- "pred_label": 6202.64990234375,
836
- "rewards/accuracies": 0.6625000238418579,
837
- "rewards/chosen": -11.662691116333008,
838
- "rewards/margins": 8.459746360778809,
839
- "rewards/rejected": -20.1224365234375,
840
  "step": 510,
841
- "use_label": 1879.3499755859375
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.532013969732247e-05,
846
- "logits/chosen": -2.4564194679260254,
847
- "logits/rejected": -2.381622076034546,
848
- "logps/chosen": -406.453125,
849
- "logps/rejected": -494.87664794921875,
850
- "loss": 0.068,
851
- "pred_label": 6340.1748046875,
852
- "rewards/accuracies": 0.7250000238418579,
853
- "rewards/chosen": -11.449905395507812,
854
- "rewards/margins": 11.066286087036133,
855
- "rewards/rejected": -22.516191482543945,
856
  "step": 520,
857
- "use_label": 1901.824951171875
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372525e-05,
862
- "logits/chosen": -2.3373048305511475,
863
- "logits/rejected": -2.183617115020752,
864
- "logps/chosen": -547.97314453125,
865
- "logps/rejected": -691.542236328125,
866
- "loss": 0.0709,
867
- "pred_label": 6487.5498046875,
868
- "rewards/accuracies": 0.643750011920929,
869
- "rewards/chosen": -24.765727996826172,
870
- "rewards/margins": 14.99103832244873,
871
- "rewards/rejected": -39.75676727294922,
872
  "step": 530,
873
- "use_label": 1914.449951171875
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.4155995343422587e-05,
878
- "logits/chosen": -2.211622714996338,
879
- "logits/rejected": -2.0457851886749268,
880
- "logps/chosen": -632.00634765625,
881
- "logps/rejected": -867.4461059570312,
882
- "loss": 0.05,
883
- "pred_label": 6636.0498046875,
884
- "rewards/accuracies": 0.7250000238418579,
885
- "rewards/chosen": -34.34697723388672,
886
- "rewards/margins": 25.800724029541016,
887
- "rewards/rejected": -60.14769744873047,
888
  "step": 540,
889
- "use_label": 1925.949951171875
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472644e-05,
894
- "logits/chosen": -2.088925838470459,
895
- "logits/rejected": -1.9701734781265259,
896
- "logps/chosen": -796.8790893554688,
897
- "logps/rejected": -983.8599853515625,
898
- "loss": 0.045,
899
- "pred_label": 6784.7998046875,
900
- "rewards/accuracies": 0.675000011920929,
901
- "rewards/chosen": -50.76759338378906,
902
- "rewards/margins": 21.424175262451172,
903
- "rewards/rejected": -72.19176483154297,
904
  "step": 550,
905
- "use_label": 1937.199951171875
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522702e-05,
910
- "logits/chosen": -2.2610983848571777,
911
- "logits/rejected": -2.1500396728515625,
912
- "logps/chosen": -535.4710693359375,
913
- "logps/rejected": -635.5113525390625,
914
- "loss": 0.067,
915
- "pred_label": 6936.47509765625,
916
- "rewards/accuracies": 0.6875,
917
- "rewards/chosen": -26.10666847229004,
918
- "rewards/margins": 15.695854187011719,
919
- "rewards/rejected": -41.802528381347656,
920
  "step": 560,
921
- "use_label": 1945.5250244140625
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.240977881257276e-05,
926
- "logits/chosen": -2.3317577838897705,
927
- "logits/rejected": -2.215259313583374,
928
- "logps/chosen": -497.422119140625,
929
- "logps/rejected": -631.938232421875,
930
- "loss": 0.0596,
931
- "pred_label": 7086.02490234375,
932
  "rewards/accuracies": 0.706250011920929,
933
- "rewards/chosen": -22.793350219726562,
934
- "rewards/margins": 14.197186470031738,
935
- "rewards/rejected": -36.99053955078125,
936
  "step": 570,
937
- "use_label": 1955.9749755859375
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622818e-05,
942
- "logits/chosen": -2.399937152862549,
943
- "logits/rejected": -2.2866618633270264,
944
- "logps/chosen": -441.74090576171875,
945
- "logps/rejected": -541.2008666992188,
946
- "loss": 0.0784,
947
- "pred_label": 7230.875,
948
- "rewards/accuracies": 0.699999988079071,
949
- "rewards/chosen": -16.363243103027344,
950
- "rewards/margins": 11.310129165649414,
951
- "rewards/rejected": -27.67337417602539,
952
  "step": 580,
953
- "use_label": 1971.125
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.124563445867288e-05,
958
- "logits/chosen": -2.3154327869415283,
959
- "logits/rejected": -2.2939510345458984,
960
- "logps/chosen": -400.68572998046875,
961
- "logps/rejected": -483.30279541015625,
962
- "loss": 0.089,
963
- "pred_label": 7372.14990234375,
964
- "rewards/accuracies": 0.6812499761581421,
965
- "rewards/chosen": -12.908785820007324,
966
- "rewards/margins": 7.343487739562988,
967
- "rewards/rejected": -20.252273559570312,
968
  "step": 590,
969
- "use_label": 1989.8499755859375
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722934e-05,
974
- "logits/chosen": -2.463757276535034,
975
- "logits/rejected": -2.3977725505828857,
976
- "logps/chosen": -393.55133056640625,
977
- "logps/rejected": -500.068359375,
978
- "loss": 0.1022,
979
- "pred_label": 7510.75,
980
- "rewards/accuracies": 0.7437499761581421,
981
- "rewards/chosen": -8.018844604492188,
982
- "rewards/margins": 11.711953163146973,
983
- "rewards/rejected": -19.730796813964844,
984
  "step": 600,
985
- "use_label": 2011.25
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.0081490104772992e-05,
990
- "logits/chosen": -2.409186840057373,
991
- "logits/rejected": -2.3435616493225098,
992
- "logps/chosen": -412.2386779785156,
993
- "logps/rejected": -426.6654357910156,
994
- "loss": 0.0856,
995
- "pred_label": 7652.7001953125,
996
- "rewards/accuracies": 0.699999988079071,
997
- "rewards/chosen": -10.632938385009766,
998
- "rewards/margins": 8.070967674255371,
999
- "rewards/rejected": -18.703907012939453,
1000
  "step": 610,
1001
- "use_label": 2029.300048828125
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.9499417927823053e-05,
1006
- "logits/chosen": -2.4093079566955566,
1007
- "logits/rejected": -2.260230779647827,
1008
- "logps/chosen": -429.3250427246094,
1009
- "logps/rejected": -516.5551147460938,
1010
- "loss": 0.0781,
1011
- "pred_label": 7797.64990234375,
1012
- "rewards/accuracies": 0.7562500238418579,
1013
- "rewards/chosen": -14.934354782104492,
1014
- "rewards/margins": 11.502239227294922,
1015
- "rewards/rejected": -26.436594009399414,
1016
  "step": 620,
1017
- "use_label": 2044.3499755859375
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-05,
1022
- "logits/chosen": -2.2741165161132812,
1023
- "logits/rejected": -2.1207118034362793,
1024
- "logps/chosen": -464.8804626464844,
1025
- "logps/rejected": -637.3003540039062,
1026
- "loss": 0.0665,
1027
- "pred_label": 7949.27490234375,
1028
- "rewards/accuracies": 0.7562500238418579,
1029
- "rewards/chosen": -17.97607421875,
1030
- "rewards/margins": 19.63665199279785,
1031
- "rewards/rejected": -37.61272430419922,
1032
  "step": 630,
1033
- "use_label": 2052.72509765625
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.833527357392317e-05,
1038
- "logits/chosen": -2.1046862602233887,
1039
- "logits/rejected": -1.9926544427871704,
1040
- "logps/chosen": -479.60479736328125,
1041
- "logps/rejected": -621.544921875,
1042
- "loss": 0.0622,
1043
- "pred_label": 8098.72509765625,
1044
- "rewards/accuracies": 0.699999988079071,
1045
- "rewards/chosen": -22.641353607177734,
1046
- "rewards/margins": 14.655824661254883,
1047
- "rewards/rejected": -37.29717254638672,
1048
  "step": 640,
1049
- "use_label": 2063.27490234375
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973227e-05,
1054
- "logits/chosen": -2.304948329925537,
1055
- "logits/rejected": -2.159106731414795,
1056
- "logps/chosen": -497.8910217285156,
1057
- "logps/rejected": -595.5516967773438,
1058
- "loss": 0.0522,
1059
- "pred_label": 8248.5,
1060
- "rewards/accuracies": 0.737500011920929,
1061
- "rewards/chosen": -19.594234466552734,
1062
- "rewards/margins": 12.16260814666748,
1063
- "rewards/rejected": -31.7568416595459,
1064
  "step": 650,
1065
- "use_label": 2073.5
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.717112922002328e-05,
1070
- "logits/chosen": -2.3109021186828613,
1071
- "logits/rejected": -2.2408225536346436,
1072
- "logps/chosen": -490.71112060546875,
1073
- "logps/rejected": -506.54766845703125,
1074
- "loss": 0.087,
1075
- "pred_label": 8398.224609375,
1076
- "rewards/accuracies": 0.637499988079071,
1077
- "rewards/chosen": -19.534738540649414,
1078
- "rewards/margins": 6.132468223571777,
1079
- "rewards/rejected": -25.667205810546875,
1080
  "step": 660,
1081
- "use_label": 2083.77490234375
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.6589057043073342e-05,
1086
- "logits/chosen": -2.2374701499938965,
1087
- "logits/rejected": -2.11650013923645,
1088
- "logps/chosen": -484.6102600097656,
1089
- "logps/rejected": -569.1256103515625,
1090
- "loss": 0.064,
1091
- "pred_label": 8546.0,
1092
- "rewards/accuracies": 0.7250000238418579,
1093
- "rewards/chosen": -20.35372543334961,
1094
- "rewards/margins": 12.419096946716309,
1095
- "rewards/rejected": -32.772823333740234,
1096
  "step": 670,
1097
- "use_label": 2096.0
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-05,
1102
- "logits/chosen": -2.3477349281311035,
1103
- "logits/rejected": -2.2388532161712646,
1104
- "logps/chosen": -479.6822814941406,
1105
- "logps/rejected": -505.4498596191406,
1106
- "loss": 0.0759,
1107
- "pred_label": 8695.724609375,
1108
- "rewards/accuracies": 0.6187499761581421,
1109
- "rewards/chosen": -19.274688720703125,
1110
- "rewards/margins": 5.44730281829834,
1111
- "rewards/rejected": -24.72199058532715,
1112
  "step": 680,
1113
- "use_label": 2106.27490234375
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173458e-05,
1118
- "logits/chosen": -2.179953098297119,
1119
- "logits/rejected": -2.0111422538757324,
1120
- "logps/chosen": -476.6915588378906,
1121
- "logps/rejected": -548.3240966796875,
1122
- "loss": 0.0593,
1123
- "pred_label": 8846.5,
1124
- "rewards/accuracies": 0.7124999761581421,
1125
- "rewards/chosen": -22.6307430267334,
1126
- "rewards/margins": 11.273223876953125,
1127
- "rewards/rejected": -33.903968811035156,
1128
  "step": 690,
1129
- "use_label": 2115.5
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223516e-05,
1134
- "logits/chosen": -2.298417329788208,
1135
- "logits/rejected": -2.1161131858825684,
1136
- "logps/chosen": -477.685546875,
1137
- "logps/rejected": -721.3785400390625,
1138
- "loss": 0.0546,
1139
- "pred_label": 8998.650390625,
1140
- "rewards/accuracies": 0.6937500238418579,
1141
- "rewards/chosen": -20.19955062866211,
1142
- "rewards/margins": 27.11043930053711,
1143
- "rewards/rejected": -47.30998992919922,
1144
  "step": 700,
1145
- "use_label": 2123.35009765625
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273575e-05,
1150
- "logits/chosen": -2.19868803024292,
1151
- "logits/rejected": -2.0781731605529785,
1152
- "logps/chosen": -629.6842041015625,
1153
- "logps/rejected": -780.2570190429688,
1154
- "loss": 0.0661,
1155
- "pred_label": 9147.0498046875,
1156
- "rewards/accuracies": 0.6937500238418579,
1157
- "rewards/chosen": -34.010963439941406,
1158
- "rewards/margins": 19.887874603271484,
1159
- "rewards/rejected": -53.898834228515625,
1160
  "step": 710,
1161
- "use_label": 2134.949951171875
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323633e-05,
1166
- "logits/chosen": -2.2372653484344482,
1167
- "logits/rejected": -2.1191489696502686,
1168
- "logps/chosen": -515.64208984375,
1169
- "logps/rejected": -691.5008544921875,
1170
- "loss": 0.0704,
1171
- "pred_label": 9300.599609375,
1172
- "rewards/accuracies": 0.7250000238418579,
1173
- "rewards/chosen": -24.41898536682129,
1174
- "rewards/margins": 19.390825271606445,
1175
- "rewards/rejected": -43.809810638427734,
1176
  "step": 720,
1177
- "use_label": 2141.39990234375
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.309662398137369e-05,
1182
- "logits/chosen": -2.2743449211120605,
1183
- "logits/rejected": -2.1811537742614746,
1184
- "logps/chosen": -521.6466064453125,
1185
- "logps/rejected": -568.2872314453125,
1186
- "loss": 0.0724,
1187
- "pred_label": 9445.150390625,
1188
- "rewards/accuracies": 0.675000011920929,
1189
- "rewards/chosen": -22.4196720123291,
1190
- "rewards/margins": 10.242195129394531,
1191
- "rewards/rejected": -32.661869049072266,
1192
  "step": 730,
1193
- "use_label": 2156.85009765625
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-05,
1198
- "logits/chosen": -2.2904646396636963,
1199
- "logits/rejected": -2.2049851417541504,
1200
- "logps/chosen": -520.117919921875,
1201
- "logps/rejected": -695.0534057617188,
1202
- "loss": 0.0636,
1203
- "pred_label": 9594.025390625,
1204
- "rewards/accuracies": 0.706250011920929,
1205
- "rewards/chosen": -21.274608612060547,
1206
- "rewards/margins": 19.581838607788086,
1207
- "rewards/rejected": -40.8564453125,
1208
  "step": 740,
1209
- "use_label": 2167.97509765625
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-05,
1214
- "logits/chosen": -2.25730562210083,
1215
- "logits/rejected": -2.1175169944763184,
1216
- "logps/chosen": -540.2493896484375,
1217
- "logps/rejected": -738.2742919921875,
1218
- "loss": 0.063,
1219
- "pred_label": 9744.1748046875,
1220
- "rewards/accuracies": 0.7124999761581421,
1221
- "rewards/chosen": -27.976119995117188,
1222
- "rewards/margins": 19.7985782623291,
1223
- "rewards/rejected": -47.77469253540039,
1224
  "step": 750,
1225
- "use_label": 2177.824951171875
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523866e-05,
1230
- "logits/chosen": -2.3123764991760254,
1231
- "logits/rejected": -2.2530713081359863,
1232
- "logps/chosen": -507.7060546875,
1233
- "logps/rejected": -641.7669067382812,
1234
- "loss": 0.0539,
1235
- "pred_label": 9894.150390625,
1236
- "rewards/accuracies": 0.7124999761581421,
1237
- "rewards/chosen": -21.224315643310547,
1238
- "rewards/margins": 16.741058349609375,
1239
- "rewards/rejected": -37.96537780761719,
1240
  "step": 760,
1241
- "use_label": 2187.85009765625
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-05,
1246
- "logits/chosen": -2.314664363861084,
1247
- "logits/rejected": -2.259186029434204,
1248
- "logps/chosen": -518.42333984375,
1249
- "logps/rejected": -678.8192749023438,
1250
- "loss": 0.0433,
1251
- "pred_label": 10047.875,
1252
- "rewards/accuracies": 0.7124999761581421,
1253
- "rewards/chosen": -21.612756729125977,
1254
- "rewards/margins": 17.79681396484375,
1255
- "rewards/rejected": -39.409568786621094,
1256
  "step": 770,
1257
- "use_label": 2194.125
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623982e-05,
1262
- "logits/chosen": -2.287078380584717,
1263
- "logits/rejected": -2.199553966522217,
1264
- "logps/chosen": -530.3980102539062,
1265
- "logps/rejected": -631.2974243164062,
1266
- "loss": 0.051,
1267
- "pred_label": 10198.8251953125,
1268
- "rewards/accuracies": 0.6812499761581421,
1269
- "rewards/chosen": -24.92042350769043,
1270
- "rewards/margins": 13.495773315429688,
1271
- "rewards/rejected": -38.41619873046875,
1272
  "step": 780,
1273
- "use_label": 2203.175048828125
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.60419091967404e-06,
1278
- "logits/chosen": -2.3670148849487305,
1279
- "logits/rejected": -2.2450852394104004,
1280
- "logps/chosen": -533.3482666015625,
1281
- "logps/rejected": -667.7590942382812,
1282
- "loss": 0.0574,
1283
- "pred_label": 10346.0,
1284
- "rewards/accuracies": 0.6499999761581421,
1285
- "rewards/chosen": -26.55388832092285,
1286
- "rewards/margins": 14.149024963378906,
1287
- "rewards/rejected": -40.702919006347656,
1288
  "step": 790,
1289
- "use_label": 2216.0
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724098e-06,
1294
- "logits/chosen": -2.2852892875671387,
1295
- "logits/rejected": -2.191178798675537,
1296
- "logps/chosen": -515.4843139648438,
1297
- "logps/rejected": -658.4345703125,
1298
- "loss": 0.0732,
1299
- "pred_label": 10492.0,
1300
- "rewards/accuracies": 0.75,
1301
- "rewards/chosen": -20.101245880126953,
1302
- "rewards/margins": 18.136005401611328,
1303
- "rewards/rejected": -38.237247467041016,
1304
  "step": 800,
1305
- "use_label": 2230.0
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774158e-06,
1310
- "logits/chosen": -2.308079481124878,
1311
- "logits/rejected": -2.2213778495788574,
1312
- "logps/chosen": -485.36602783203125,
1313
- "logps/rejected": -505.19189453125,
1314
- "loss": 0.0907,
1315
- "pred_label": 10638.150390625,
1316
- "rewards/accuracies": 0.6312500238418579,
1317
- "rewards/chosen": -20.434207916259766,
1318
- "rewards/margins": 7.203681945800781,
1319
- "rewards/rejected": -27.637889862060547,
1320
  "step": 810,
1321
- "use_label": 2243.85009765625
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824214e-06,
1326
- "logits/chosen": -2.3282344341278076,
1327
- "logits/rejected": -2.243950366973877,
1328
- "logps/chosen": -473.38262939453125,
1329
- "logps/rejected": -566.6471557617188,
1330
- "loss": 0.0883,
1331
- "pred_label": 10780.900390625,
1332
- "rewards/accuracies": 0.706250011920929,
1333
- "rewards/chosen": -18.006444931030273,
1334
- "rewards/margins": 14.502110481262207,
1335
- "rewards/rejected": -32.5085563659668,
1336
  "step": 820,
1337
- "use_label": 2261.10009765625
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-06,
1342
- "logits/chosen": -2.345092296600342,
1343
- "logits/rejected": -2.301511287689209,
1344
- "logps/chosen": -431.64520263671875,
1345
- "logps/rejected": -632.6573486328125,
1346
- "loss": 0.0699,
1347
- "pred_label": 10927.275390625,
1348
- "rewards/accuracies": 0.75,
1349
- "rewards/chosen": -16.88318634033203,
1350
- "rewards/margins": 18.3623104095459,
1351
- "rewards/rejected": -35.24549865722656,
1352
  "step": 830,
1353
- "use_label": 2274.72509765625
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-06,
1358
- "logits/chosen": -2.3140549659729004,
1359
- "logits/rejected": -2.3112993240356445,
1360
- "logps/chosen": -505.6224670410156,
1361
- "logps/rejected": -544.6993408203125,
1362
- "loss": 0.074,
1363
- "pred_label": 11074.5498046875,
1364
- "rewards/accuracies": 0.6499999761581421,
1365
- "rewards/chosen": -23.76053237915039,
1366
- "rewards/margins": 4.261009216308594,
1367
- "rewards/rejected": -28.02153968811035,
1368
  "step": 840,
1369
- "use_label": 2287.449951171875
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-06,
1374
- "logits/chosen": -2.3638854026794434,
1375
- "logits/rejected": -2.3075807094573975,
1376
- "logps/chosen": -458.67181396484375,
1377
- "logps/rejected": -516.2307739257812,
1378
- "loss": 0.0849,
1379
- "pred_label": 11220.900390625,
1380
- "rewards/accuracies": 0.7562500238418579,
1381
- "rewards/chosen": -16.364498138427734,
1382
- "rewards/margins": 10.922462463378906,
1383
- "rewards/rejected": -27.28696060180664,
1384
  "step": 850,
1385
- "use_label": 2301.10009765625
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024447e-06,
1390
- "logits/chosen": -2.3312666416168213,
1391
- "logits/rejected": -2.2601587772369385,
1392
- "logps/chosen": -464.119140625,
1393
- "logps/rejected": -580.7103881835938,
1394
- "loss": 0.0727,
1395
- "pred_label": 11369.724609375,
1396
- "rewards/accuracies": 0.706250011920929,
1397
- "rewards/chosen": -17.923574447631836,
1398
- "rewards/margins": 12.89836597442627,
1399
- "rewards/rejected": -30.821941375732422,
1400
  "step": 860,
1401
- "use_label": 2312.27490234375
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074506e-06,
1406
- "logits/chosen": -2.3686623573303223,
1407
- "logits/rejected": -2.2975738048553467,
1408
- "logps/chosen": -374.78192138671875,
1409
- "logps/rejected": -505.87152099609375,
1410
- "loss": 0.0739,
1411
- "pred_label": 11518.5751953125,
1412
- "rewards/accuracies": 0.731249988079071,
1413
- "rewards/chosen": -13.179441452026367,
1414
- "rewards/margins": 13.382822036743164,
1415
- "rewards/rejected": -26.562265396118164,
1416
  "step": 870,
1417
- "use_label": 2323.425048828125
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.3655413271245635e-06,
1422
- "logits/chosen": -2.3770673274993896,
1423
- "logits/rejected": -2.2960658073425293,
1424
- "logps/chosen": -470.25439453125,
1425
- "logps/rejected": -548.5438842773438,
1426
- "loss": 0.0668,
1427
- "pred_label": 11666.25,
1428
- "rewards/accuracies": 0.7437499761581421,
1429
- "rewards/chosen": -18.366519927978516,
1430
- "rewards/margins": 10.322815895080566,
1431
- "rewards/rejected": -28.689334869384766,
1432
  "step": 880,
1433
- "use_label": 2335.75
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.7834691501746217e-06,
1438
- "logits/chosen": -2.3971574306488037,
1439
- "logits/rejected": -2.3389389514923096,
1440
- "logps/chosen": -445.2398986816406,
1441
- "logps/rejected": -555.1053466796875,
1442
- "loss": 0.0667,
1443
- "pred_label": 11815.775390625,
1444
- "rewards/accuracies": 0.706250011920929,
1445
- "rewards/chosen": -16.39553451538086,
1446
- "rewards/margins": 11.912240982055664,
1447
- "rewards/rejected": -28.30777931213379,
1448
  "step": 890,
1449
- "use_label": 2346.22509765625
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.2013969732246805e-06,
1454
- "logits/chosen": -2.279033660888672,
1455
- "logits/rejected": -2.161271572113037,
1456
- "logps/chosen": -450.6705627441406,
1457
- "logps/rejected": -566.3276977539062,
1458
- "loss": 0.0876,
1459
- "pred_label": 11961.5751953125,
1460
- "rewards/accuracies": 0.668749988079071,
1461
- "rewards/chosen": -17.153247833251953,
1462
- "rewards/margins": 13.627708435058594,
1463
- "rewards/rejected": -30.780956268310547,
1464
  "step": 900,
1465
- "use_label": 2360.425048828125
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.6193247962747383e-06,
1470
- "logits/chosen": -2.414407253265381,
1471
- "logits/rejected": -2.305093288421631,
1472
- "logps/chosen": -450.60595703125,
1473
- "logps/rejected": -530.6917724609375,
1474
- "loss": 0.0717,
1475
- "pred_label": 12107.375,
1476
- "rewards/accuracies": 0.731249988079071,
1477
- "rewards/chosen": -15.418998718261719,
1478
- "rewards/margins": 11.336746215820312,
1479
- "rewards/rejected": -26.7557430267334,
1480
  "step": 910,
1481
- "use_label": 2374.625
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-06,
1486
- "logits/chosen": -2.346126079559326,
1487
- "logits/rejected": -2.2459468841552734,
1488
- "logps/chosen": -454.90740966796875,
1489
- "logps/rejected": -556.7593994140625,
1490
- "loss": 0.0626,
1491
- "pred_label": 12256.9501953125,
1492
- "rewards/accuracies": 0.699999988079071,
1493
- "rewards/chosen": -15.801968574523926,
1494
- "rewards/margins": 13.915499687194824,
1495
- "rewards/rejected": -29.717464447021484,
1496
  "step": 920,
1497
- "use_label": 2385.050048828125
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-06,
1502
- "logits/chosen": -2.400489091873169,
1503
- "logits/rejected": -2.255152702331543,
1504
- "logps/chosen": -392.2051086425781,
1505
- "logps/rejected": -571.8855590820312,
1506
- "loss": 0.0663,
1507
- "pred_label": 12408.974609375,
1508
  "rewards/accuracies": 0.8125,
1509
- "rewards/chosen": -12.399200439453125,
1510
- "rewards/margins": 20.609838485717773,
1511
- "rewards/rejected": -33.00904083251953,
1512
  "step": 930,
1513
- "use_label": 2393.02490234375
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249127e-07,
1518
- "logits/chosen": -2.398634195327759,
1519
- "logits/rejected": -2.28908634185791,
1520
- "logps/chosen": -459.11846923828125,
1521
- "logps/rejected": -550.9254150390625,
1522
- "loss": 0.0731,
1523
- "pred_label": 12559.150390625,
1524
- "rewards/accuracies": 0.71875,
1525
- "rewards/chosen": -17.1726016998291,
1526
- "rewards/margins": 11.231449127197266,
1527
- "rewards/rejected": -28.404048919677734,
1528
  "step": 940,
1529
- "use_label": 2402.85009765625
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-07,
1534
- "logits/chosen": -2.373722791671753,
1535
- "logits/rejected": -2.2480380535125732,
1536
- "logps/chosen": -450.88623046875,
1537
- "logps/rejected": -560.01220703125,
1538
- "loss": 0.069,
1539
- "pred_label": 12709.775390625,
1540
- "rewards/accuracies": 0.6875,
1541
- "rewards/chosen": -18.243160247802734,
1542
- "rewards/margins": 12.46325397491455,
1543
- "rewards/rejected": -30.7064151763916,
1544
  "step": 950,
1545
- "use_label": 2412.22509765625
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
- "eval_logits/chosen": -2.3870656490325928,
1550
- "eval_logits/rejected": -2.274824380874634,
1551
- "eval_logps/chosen": -441.0377502441406,
1552
- "eval_logps/rejected": -538.9608764648438,
1553
- "eval_loss": 0.07583338022232056,
1554
- "eval_pred_label": 13084.7841796875,
1555
- "eval_rewards/accuracies": 0.7080000042915344,
1556
- "eval_rewards/chosen": -15.678034782409668,
1557
- "eval_rewards/margins": 12.288079261779785,
1558
- "eval_rewards/rejected": -27.966114044189453,
1559
- "eval_runtime": 450.8813,
1560
- "eval_samples_per_second": 4.436,
1561
- "eval_steps_per_second": 0.277,
1562
- "eval_use_label": 2447.216064453125,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
- "train_loss": 0.12920980815488006,
1570
- "train_runtime": 25162.7962,
1571
- "train_samples_per_second": 2.43,
1572
  "train_steps_per_second": 0.038
1573
  }
1574
  ],
 
75
  {
76
  "epoch": 0.04,
77
  "learning_rate": 2.0833333333333336e-05,
78
+ "logits/chosen": -2.840946674346924,
79
+ "logits/rejected": -2.8493659496307373,
80
+ "logps/chosen": -281.32928466796875,
81
+ "logps/rejected": -277.8607482910156,
82
+ "loss": 0.6339,
83
+ "pred_label": 0.10000000149011612,
84
+ "rewards/accuracies": 0.6875,
85
+ "rewards/chosen": 0.02641097828745842,
86
+ "rewards/margins": 0.2079576551914215,
87
+ "rewards/rejected": -0.1815466731786728,
88
  "step": 40,
89
+ "use_label": 561.9000244140625
90
  },
91
  {
92
  "epoch": 0.05,
93
  "learning_rate": 2.604166666666667e-05,
94
+ "logits/chosen": -2.854478120803833,
95
+ "logits/rejected": -2.839433193206787,
96
+ "logps/chosen": -266.80517578125,
97
+ "logps/rejected": -261.9176025390625,
98
+ "loss": 0.5914,
99
+ "pred_label": 2.950000047683716,
100
+ "rewards/accuracies": 0.643750011920929,
101
+ "rewards/chosen": -0.06968289613723755,
102
+ "rewards/margins": 0.33043327927589417,
103
+ "rewards/rejected": -0.4001162648200989,
104
  "step": 50,
105
+ "use_label": 719.0499877929688
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
+ "logits/chosen": -2.817157506942749,
111
+ "logits/rejected": -2.8072521686553955,
112
+ "logps/chosen": -300.76080322265625,
113
+ "logps/rejected": -290.3218994140625,
114
+ "loss": 0.5988,
115
+ "pred_label": 11.574999809265137,
116
+ "rewards/accuracies": 0.699999988079071,
117
+ "rewards/chosen": -0.03455673158168793,
118
+ "rewards/margins": 0.3870925307273865,
119
+ "rewards/rejected": -0.4216492772102356,
120
  "step": 60,
121
+ "use_label": 870.4249877929688
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.6458333333333336e-05,
126
+ "logits/chosen": -2.820021152496338,
127
+ "logits/rejected": -2.813854217529297,
128
+ "logps/chosen": -294.513671875,
129
+ "logps/rejected": -278.9858093261719,
130
+ "loss": 0.5227,
131
+ "pred_label": 25.399999618530273,
132
+ "rewards/accuracies": 0.7437499761581421,
133
+ "rewards/chosen": -0.020013216882944107,
134
+ "rewards/margins": 0.5820196866989136,
135
+ "rewards/rejected": -0.6020328998565674,
136
  "step": 70,
137
+ "use_label": 1016.5999755859375
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
+ "logits/chosen": -2.7645280361175537,
143
+ "logits/rejected": -2.7516016960144043,
144
+ "logps/chosen": -306.9416809082031,
145
+ "logps/rejected": -288.9856262207031,
146
+ "loss": 0.4536,
147
+ "pred_label": 58.599998474121094,
148
+ "rewards/accuracies": 0.75,
149
+ "rewards/chosen": -0.16363248229026794,
150
+ "rewards/margins": 0.8336677551269531,
151
+ "rewards/rejected": -0.9973002672195435,
152
  "step": 80,
153
+ "use_label": 1143.4000244140625
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6875e-05,
158
+ "logits/chosen": -2.6945321559906006,
159
+ "logits/rejected": -2.747893810272217,
160
+ "logps/chosen": -295.63934326171875,
161
+ "logps/rejected": -275.24127197265625,
162
+ "loss": 0.4735,
163
+ "pred_label": 106.5,
164
+ "rewards/accuracies": 0.731249988079071,
165
+ "rewards/chosen": 0.014383295550942421,
166
+ "rewards/margins": 0.911063551902771,
167
+ "rewards/rejected": -0.8966802358627319,
168
  "step": 90,
169
+ "use_label": 1255.5
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922003e-05,
174
+ "logits/chosen": -2.719611406326294,
175
+ "logits/rejected": -2.718784809112549,
176
+ "logps/chosen": -276.9577941894531,
177
+ "logps/rejected": -306.4429626464844,
178
+ "loss": 0.484,
179
+ "pred_label": 153.3249969482422,
180
+ "rewards/accuracies": 0.706250011920929,
181
+ "rewards/chosen": -0.28260549902915955,
182
+ "rewards/margins": 1.008049726486206,
183
+ "rewards/rejected": -1.290655255317688,
184
  "step": 100,
185
+ "use_label": 1368.675048828125
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227008e-05,
190
+ "logits/chosen": -2.7281861305236816,
191
+ "logits/rejected": -2.687361478805542,
192
+ "logps/chosen": -269.6018981933594,
193
+ "logps/rejected": -263.89166259765625,
194
+ "loss": 0.4361,
195
+ "pred_label": 213.60000610351562,
196
+ "rewards/accuracies": 0.6312500238418579,
197
+ "rewards/chosen": -0.4927287697792053,
198
+ "rewards/margins": 0.9790979623794556,
199
+ "rewards/rejected": -1.4718266725540161,
200
  "step": 110,
201
+ "use_label": 1468.4000244140625
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-05,
206
+ "logits/chosen": -2.7501158714294434,
207
+ "logits/rejected": -2.742772340774536,
208
+ "logps/chosen": -281.5338439941406,
209
+ "logps/rejected": -259.219970703125,
210
+ "loss": 0.4674,
211
+ "pred_label": 270.0249938964844,
212
+ "rewards/accuracies": 0.6625000238418579,
213
+ "rewards/chosen": -0.20209825038909912,
214
+ "rewards/margins": 0.9515292048454285,
215
+ "rewards/rejected": -1.1536273956298828,
216
  "step": 120,
217
+ "use_label": 1571.9749755859375
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.80209545983702e-05,
222
+ "logits/chosen": -2.8077096939086914,
223
+ "logits/rejected": -2.7647416591644287,
224
+ "logps/chosen": -308.7791748046875,
225
+ "logps/rejected": -282.69976806640625,
226
+ "loss": 0.4803,
227
+ "pred_label": 317.4750061035156,
228
+ "rewards/accuracies": 0.6812499761581421,
229
+ "rewards/chosen": -0.15117435157299042,
230
+ "rewards/margins": 0.8076679110527039,
231
+ "rewards/rejected": -0.958842396736145,
232
  "step": 130,
233
+ "use_label": 1684.5250244140625
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-05,
238
+ "logits/chosen": -2.814915657043457,
239
+ "logits/rejected": -2.8093409538269043,
240
+ "logps/chosen": -261.60418701171875,
241
+ "logps/rejected": -249.7925567626953,
242
+ "loss": 0.3944,
243
+ "pred_label": 370.67498779296875,
244
+ "rewards/accuracies": 0.706250011920929,
245
+ "rewards/chosen": -0.34973591566085815,
246
+ "rewards/margins": 1.1276283264160156,
247
+ "rewards/rejected": -1.4773643016815186,
248
  "step": 140,
249
+ "use_label": 1791.324951171875
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447032e-05,
254
+ "logits/chosen": -2.71696138381958,
255
+ "logits/rejected": -2.740691661834717,
256
+ "logps/chosen": -302.32843017578125,
257
+ "logps/rejected": -298.6430358886719,
258
+ "loss": 0.3948,
259
+ "pred_label": 430.0,
260
  "rewards/accuracies": 0.612500011920929,
261
+ "rewards/chosen": -0.7442375421524048,
262
+ "rewards/margins": 0.9453207850456238,
263
+ "rewards/rejected": -1.6895582675933838,
264
  "step": 150,
265
+ "use_label": 1892.0
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.6274738067520374e-05,
270
+ "logits/chosen": -2.7621922492980957,
271
+ "logits/rejected": -2.707430601119995,
272
+ "logps/chosen": -264.4729919433594,
273
+ "logps/rejected": -283.64837646484375,
274
+ "loss": 0.4037,
275
+ "pred_label": 494.9750061035156,
276
+ "rewards/accuracies": 0.71875,
277
+ "rewards/chosen": -0.20909884572029114,
278
+ "rewards/margins": 1.2514889240264893,
279
+ "rewards/rejected": -1.4605878591537476,
280
  "step": 160,
281
+ "use_label": 1987.0250244140625
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570435e-05,
286
+ "logits/chosen": -2.800288438796997,
287
+ "logits/rejected": -2.8005611896514893,
288
+ "logps/chosen": -298.9527282714844,
289
+ "logps/rejected": -264.09014892578125,
290
+ "loss": 0.3923,
291
+ "pred_label": 555.2249755859375,
292
+ "rewards/accuracies": 0.6625000238418579,
293
+ "rewards/chosen": -0.24765756726264954,
294
+ "rewards/margins": 1.1968435049057007,
295
+ "rewards/rejected": -1.4445011615753174,
296
  "step": 170,
297
+ "use_label": 2086.77490234375
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.511059371362049e-05,
302
+ "logits/chosen": -2.7035715579986572,
303
+ "logits/rejected": -2.6763572692871094,
304
+ "logps/chosen": -261.8144836425781,
305
+ "logps/rejected": -294.1943054199219,
306
+ "loss": 0.3547,
307
+ "pred_label": 639.875,
308
+ "rewards/accuracies": 0.731249988079071,
309
+ "rewards/chosen": -0.588528573513031,
310
+ "rewards/margins": 1.7626692056655884,
311
+ "rewards/rejected": -2.3511977195739746,
312
  "step": 180,
313
+ "use_label": 2162.125
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.452852153667055e-05,
318
+ "logits/chosen": -2.788900375366211,
319
+ "logits/rejected": -2.745344638824463,
320
+ "logps/chosen": -311.311767578125,
321
+ "logps/rejected": -287.326416015625,
322
+ "loss": 0.3357,
323
+ "pred_label": 725.2999877929688,
324
+ "rewards/accuracies": 0.793749988079071,
325
+ "rewards/chosen": -0.4055810868740082,
326
+ "rewards/margins": 2.22284197807312,
327
+ "rewards/rejected": -2.6284232139587402,
328
  "step": 190,
329
+ "use_label": 2236.699951171875
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.394644935972061e-05,
334
+ "logits/chosen": -2.789668560028076,
335
+ "logits/rejected": -2.7655420303344727,
336
+ "logps/chosen": -274.904052734375,
337
+ "logps/rejected": -299.71356201171875,
338
+ "loss": 0.3242,
339
+ "pred_label": 815.0,
340
+ "rewards/accuracies": 0.71875,
341
+ "rewards/chosen": -1.3575278520584106,
342
+ "rewards/margins": 1.7897872924804688,
343
+ "rewards/rejected": -3.147315263748169,
344
  "step": 200,
345
+ "use_label": 2307.0
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277067e-05,
350
+ "logits/chosen": -2.7797892093658447,
351
+ "logits/rejected": -2.796938419342041,
352
+ "logps/chosen": -310.7392883300781,
353
+ "logps/rejected": -278.1353454589844,
354
+ "loss": 0.3789,
355
+ "pred_label": 897.2750244140625,
356
+ "rewards/accuracies": 0.75,
357
+ "rewards/chosen": -0.9491372108459473,
358
+ "rewards/margins": 1.4454370737075806,
359
+ "rewards/rejected": -2.394573926925659,
360
  "step": 210,
361
+ "use_label": 2384.72509765625
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-05,
366
+ "logits/chosen": -2.729732036590576,
367
+ "logits/rejected": -2.7413415908813477,
368
+ "logps/chosen": -283.9934387207031,
369
+ "logps/rejected": -297.1932678222656,
370
+ "loss": 0.3046,
371
+ "pred_label": 983.3499755859375,
372
+ "rewards/accuracies": 0.75,
373
+ "rewards/chosen": -0.9001734852790833,
374
+ "rewards/margins": 1.7534692287445068,
375
+ "rewards/rejected": -2.6536426544189453,
376
  "step": 220,
377
+ "use_label": 2458.64990234375
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-05,
382
+ "logits/chosen": -2.7093918323516846,
383
+ "logits/rejected": -2.724216938018799,
384
+ "logps/chosen": -320.80218505859375,
385
+ "logps/rejected": -295.5621643066406,
386
+ "loss": 0.2963,
387
+ "pred_label": 1077.550048828125,
388
+ "rewards/accuracies": 0.7437499761581421,
389
+ "rewards/chosen": -1.2629055976867676,
390
+ "rewards/margins": 1.8823902606964111,
391
+ "rewards/rejected": -3.1452958583831787,
392
  "step": 230,
393
+ "use_label": 2524.449951171875
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.161816065192084e-05,
398
+ "logits/chosen": -2.707616090774536,
399
+ "logits/rejected": -2.6788878440856934,
400
+ "logps/chosen": -300.3539123535156,
401
+ "logps/rejected": -273.60333251953125,
402
+ "loss": 0.2668,
403
+ "pred_label": 1176.5250244140625,
404
+ "rewards/accuracies": 0.6875,
405
+ "rewards/chosen": -2.5776500701904297,
406
+ "rewards/margins": 2.4807069301605225,
407
+ "rewards/rejected": -5.058356761932373,
408
  "step": 240,
409
+ "use_label": 2585.47509765625
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.10360884749709e-05,
414
+ "logits/chosen": -2.7368381023406982,
415
+ "logits/rejected": -2.722391366958618,
416
+ "logps/chosen": -316.1106262207031,
417
+ "logps/rejected": -321.246337890625,
418
+ "loss": 0.3166,
419
+ "pred_label": 1270.574951171875,
420
+ "rewards/accuracies": 0.706250011920929,
421
+ "rewards/chosen": -2.5470499992370605,
422
+ "rewards/margins": 2.635700225830078,
423
+ "rewards/rejected": -5.182750225067139,
424
  "step": 250,
425
+ "use_label": 2651.425048828125
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.045401629802096e-05,
430
+ "logits/chosen": -2.7675890922546387,
431
+ "logits/rejected": -2.7713561058044434,
432
+ "logps/chosen": -266.34136962890625,
433
+ "logps/rejected": -286.3191833496094,
434
+ "loss": 0.3408,
435
+ "pred_label": 1354.6500244140625,
436
+ "rewards/accuracies": 0.7250000238418579,
437
+ "rewards/chosen": -1.7731956243515015,
438
+ "rewards/margins": 1.8298133611679077,
439
+ "rewards/rejected": -3.603008985519409,
440
  "step": 260,
441
+ "use_label": 2727.35009765625
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.9871944121071014e-05,
446
+ "logits/chosen": -2.764836311340332,
447
+ "logits/rejected": -2.7679736614227295,
448
+ "logps/chosen": -306.4355773925781,
449
+ "logps/rejected": -316.5330505371094,
450
+ "loss": 0.3,
451
+ "pred_label": 1444.324951171875,
452
+ "rewards/accuracies": 0.793749988079071,
453
+ "rewards/chosen": -1.4564238786697388,
454
+ "rewards/margins": 2.7216858863830566,
455
+ "rewards/rejected": -4.178110122680664,
456
  "step": 270,
457
+ "use_label": 2797.675048828125
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.928987194412107e-05,
462
+ "logits/chosen": -2.81805682182312,
463
+ "logits/rejected": -2.8056905269622803,
464
+ "logps/chosen": -309.92047119140625,
465
+ "logps/rejected": -300.94268798828125,
466
+ "loss": 0.3011,
467
+ "pred_label": 1543.4749755859375,
468
+ "rewards/accuracies": 0.75,
469
+ "rewards/chosen": -1.595649003982544,
470
+ "rewards/margins": 2.275697708129883,
471
+ "rewards/rejected": -3.8713467121124268,
472
  "step": 280,
473
+ "use_label": 2858.52490234375
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-05,
478
+ "logits/chosen": -2.786515235900879,
479
+ "logits/rejected": -2.7539620399475098,
480
+ "logps/chosen": -312.62060546875,
481
+ "logps/rejected": -268.4746398925781,
482
+ "loss": 0.3045,
483
+ "pred_label": 1641.2249755859375,
484
+ "rewards/accuracies": 0.7124999761581421,
485
+ "rewards/chosen": -1.9698221683502197,
486
+ "rewards/margins": 1.8559925556182861,
487
+ "rewards/rejected": -3.825814723968506,
488
  "step": 290,
489
+ "use_label": 2920.77490234375
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022119e-05,
494
+ "logits/chosen": -2.79560923576355,
495
+ "logits/rejected": -2.744600296020508,
496
+ "logps/chosen": -279.0300598144531,
497
+ "logps/rejected": -275.5624084472656,
498
+ "loss": 0.3083,
499
+ "pred_label": 1731.300048828125,
500
+ "rewards/accuracies": 0.706250011920929,
501
+ "rewards/chosen": -1.5351210832595825,
502
+ "rewards/margins": 2.036700963973999,
503
+ "rewards/rejected": -3.571821928024292,
504
  "step": 300,
505
+ "use_label": 2990.699951171875
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.7543655413271246e-05,
510
+ "logits/chosen": -2.7574381828308105,
511
+ "logits/rejected": -2.6999497413635254,
512
+ "logps/chosen": -285.3556213378906,
513
+ "logps/rejected": -344.51849365234375,
514
+ "loss": 0.2794,
515
+ "pred_label": 1830.0,
516
+ "rewards/accuracies": 0.78125,
517
+ "rewards/chosen": -1.8423988819122314,
518
+ "rewards/margins": 3.7551066875457764,
519
+ "rewards/rejected": -5.597506046295166,
520
  "step": 310,
521
+ "use_label": 3052.0
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.696158323632131e-05,
526
+ "logits/chosen": -2.7264797687530518,
527
+ "logits/rejected": -2.687101125717163,
528
+ "logps/chosen": -318.2635192871094,
529
+ "logps/rejected": -331.156005859375,
530
+ "loss": 0.2657,
531
+ "pred_label": 1930.050048828125,
532
+ "rewards/accuracies": 0.699999988079071,
533
+ "rewards/chosen": -2.228361129760742,
534
+ "rewards/margins": 2.806436777114868,
535
+ "rewards/rejected": -5.034797668457031,
536
  "step": 320,
537
+ "use_label": 3111.949951171875
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-05,
542
+ "logits/chosen": -2.7709312438964844,
543
+ "logits/rejected": -2.7176883220672607,
544
+ "logps/chosen": -289.0472717285156,
545
+ "logps/rejected": -277.4143371582031,
546
+ "loss": 0.2868,
547
+ "pred_label": 2034.1500244140625,
548
+ "rewards/accuracies": 0.6875,
549
+ "rewards/chosen": -1.8098747730255127,
550
+ "rewards/margins": 2.50335955619812,
551
+ "rewards/rejected": -4.313233852386475,
552
  "step": 330,
553
+ "use_label": 3167.85009765625
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-05,
558
+ "logits/chosen": -2.69682240486145,
559
+ "logits/rejected": -2.7000985145568848,
560
+ "logps/chosen": -308.01141357421875,
561
+ "logps/rejected": -279.4844665527344,
562
+ "loss": 0.3048,
563
+ "pred_label": 2128.949951171875,
564
+ "rewards/accuracies": 0.675000011920929,
565
+ "rewards/chosen": -1.883141279220581,
566
+ "rewards/margins": 1.98026442527771,
567
+ "rewards/rejected": -3.863405704498291,
568
  "step": 340,
569
+ "use_label": 3233.050048828125
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.5215366705471484e-05,
574
+ "logits/chosen": -2.663508176803589,
575
+ "logits/rejected": -2.6349689960479736,
576
+ "logps/chosen": -314.67962646484375,
577
+ "logps/rejected": -331.1473693847656,
578
+ "loss": 0.2869,
579
+ "pred_label": 2234.77490234375,
580
+ "rewards/accuracies": 0.8187500238418579,
581
+ "rewards/chosen": -1.7484877109527588,
582
+ "rewards/margins": 3.6000218391418457,
583
+ "rewards/rejected": -5.348509788513184,
584
  "step": 350,
585
+ "use_label": 3287.22509765625
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.463329452852154e-05,
590
+ "logits/chosen": -2.7377424240112305,
591
+ "logits/rejected": -2.729678153991699,
592
+ "logps/chosen": -297.26873779296875,
593
+ "logps/rejected": -313.16888427734375,
594
+ "loss": 0.2666,
595
+ "pred_label": 2342.22509765625,
596
+ "rewards/accuracies": 0.762499988079071,
597
+ "rewards/chosen": -2.4474658966064453,
598
+ "rewards/margins": 2.349332094192505,
599
+ "rewards/rejected": -4.796797752380371,
600
  "step": 360,
601
+ "use_label": 3339.77490234375
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.40512223515716e-05,
606
+ "logits/chosen": -2.6696877479553223,
607
+ "logits/rejected": -2.5973799228668213,
608
+ "logps/chosen": -310.88897705078125,
609
+ "logps/rejected": -354.97222900390625,
610
+ "loss": 0.2556,
611
+ "pred_label": 2450.97509765625,
612
+ "rewards/accuracies": 0.7875000238418579,
613
+ "rewards/chosen": -3.468627452850342,
614
+ "rewards/margins": 4.456360816955566,
615
+ "rewards/rejected": -7.924988746643066,
616
  "step": 370,
617
+ "use_label": 3391.02490234375
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.3469150174621654e-05,
622
+ "logits/chosen": -2.7021727561950684,
623
+ "logits/rejected": -2.679241418838501,
624
+ "logps/chosen": -320.2778015136719,
625
+ "logps/rejected": -327.36431884765625,
626
+ "loss": 0.2394,
627
+ "pred_label": 2560.375,
628
+ "rewards/accuracies": 0.71875,
629
+ "rewards/chosen": -3.5047569274902344,
630
+ "rewards/margins": 3.173809289932251,
631
+ "rewards/rejected": -6.678565979003906,
632
  "step": 380,
633
+ "use_label": 3441.625
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-05,
638
+ "logits/chosen": -2.6417362689971924,
639
+ "logits/rejected": -2.6089835166931152,
640
+ "logps/chosen": -295.4016418457031,
641
+ "logps/rejected": -288.3475341796875,
642
+ "loss": 0.2799,
643
+ "pred_label": 2666.675048828125,
644
+ "rewards/accuracies": 0.737500011920929,
645
+ "rewards/chosen": -1.2609713077545166,
646
+ "rewards/margins": 3.1455700397491455,
647
+ "rewards/rejected": -4.406540870666504,
648
  "step": 390,
649
+ "use_label": 3495.324951171875
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.2305005820721776e-05,
654
+ "logits/chosen": -2.774970054626465,
655
+ "logits/rejected": -2.757179021835327,
656
+ "logps/chosen": -317.8477478027344,
657
+ "logps/rejected": -290.2752685546875,
658
+ "loss": 0.2669,
659
+ "pred_label": 2758.64990234375,
660
+ "rewards/accuracies": 0.71875,
661
+ "rewards/chosen": -1.0796750783920288,
662
+ "rewards/margins": 2.0139389038085938,
663
+ "rewards/rejected": -3.093614101409912,
664
  "step": 400,
665
+ "use_label": 3563.35009765625
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.172293364377183e-05,
670
+ "logits/chosen": -2.777430295944214,
671
+ "logits/rejected": -2.7574374675750732,
672
+ "logps/chosen": -312.574951171875,
673
+ "logps/rejected": -291.488037109375,
674
+ "loss": 0.296,
675
+ "pred_label": 2843.10009765625,
676
+ "rewards/accuracies": 0.668749988079071,
677
+ "rewards/chosen": -1.4477269649505615,
678
+ "rewards/margins": 1.8462398052215576,
679
+ "rewards/rejected": -3.2939670085906982,
680
  "step": 410,
681
+ "use_label": 3638.89990234375
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-05,
686
+ "logits/chosen": -2.718291759490967,
687
+ "logits/rejected": -2.688828706741333,
688
+ "logps/chosen": -254.98593139648438,
689
+ "logps/rejected": -283.18560791015625,
690
+ "loss": 0.3046,
691
+ "pred_label": 2935.14990234375,
692
+ "rewards/accuracies": 0.6937500238418579,
693
+ "rewards/chosen": -1.440553069114685,
694
+ "rewards/margins": 1.9665199518203735,
695
+ "rewards/rejected": -3.4070727825164795,
696
  "step": 420,
697
+ "use_label": 3706.85009765625
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.055878928987195e-05,
702
+ "logits/chosen": -2.7530155181884766,
703
+ "logits/rejected": -2.7446746826171875,
704
+ "logps/chosen": -288.42120361328125,
705
+ "logps/rejected": -287.4293518066406,
706
+ "loss": 0.3446,
707
+ "pred_label": 3031.39990234375,
708
+ "rewards/accuracies": 0.6875,
709
+ "rewards/chosen": -1.0520861148834229,
710
+ "rewards/margins": 1.7391481399536133,
711
+ "rewards/rejected": -2.791234254837036,
712
  "step": 430,
713
+ "use_label": 3770.60009765625
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922005e-05,
718
+ "logits/chosen": -2.8176846504211426,
719
+ "logits/rejected": -2.8231449127197266,
720
+ "logps/chosen": -296.26837158203125,
721
+ "logps/rejected": -284.00335693359375,
722
+ "loss": 0.2945,
723
+ "pred_label": 3121.39990234375,
724
+ "rewards/accuracies": 0.71875,
725
+ "rewards/chosen": -1.2998032569885254,
726
+ "rewards/margins": 1.8008592128753662,
727
+ "rewards/rejected": -3.1006627082824707,
728
  "step": 440,
729
+ "use_label": 3840.60009765625
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-05,
734
+ "logits/chosen": -2.6171724796295166,
735
+ "logits/rejected": -2.6079916954040527,
736
+ "logps/chosen": -248.51025390625,
737
+ "logps/rejected": -273.4137268066406,
738
+ "loss": 0.287,
739
+ "pred_label": 3211.925048828125,
740
+ "rewards/accuracies": 0.7250000238418579,
741
+ "rewards/chosen": -1.8791544437408447,
742
+ "rewards/margins": 2.497044086456299,
743
+ "rewards/rejected": -4.3761982917785645,
744
  "step": 450,
745
+ "use_label": 3910.074951171875
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.881257275902212e-05,
750
+ "logits/chosen": -2.815080165863037,
751
+ "logits/rejected": -2.8055264949798584,
752
+ "logps/chosen": -318.76666259765625,
753
+ "logps/rejected": -305.4552917480469,
754
+ "loss": 0.3106,
755
+ "pred_label": 3311.449951171875,
756
+ "rewards/accuracies": 0.6937500238418579,
757
+ "rewards/chosen": -1.6964454650878906,
758
+ "rewards/margins": 1.949159026145935,
759
+ "rewards/rejected": -3.645604372024536,
760
  "step": 460,
761
+ "use_label": 3970.550048828125
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072178e-05,
766
+ "logits/chosen": -2.736795425415039,
767
+ "logits/rejected": -2.70324444770813,
768
+ "logps/chosen": -289.38824462890625,
769
+ "logps/rejected": -302.501708984375,
770
+ "loss": 0.2652,
771
+ "pred_label": 3412.550048828125,
772
+ "rewards/accuracies": 0.6875,
773
+ "rewards/chosen": -2.2287776470184326,
774
+ "rewards/margins": 2.337141513824463,
775
+ "rewards/rejected": -4.565918922424316,
776
  "step": 470,
777
+ "use_label": 4029.449951171875
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-05,
782
+ "logits/chosen": -2.6519317626953125,
783
+ "logits/rejected": -2.616342067718506,
784
+ "logps/chosen": -274.76422119140625,
785
+ "logps/rejected": -303.0647277832031,
786
+ "loss": 0.3052,
787
+ "pred_label": 3516.85009765625,
788
+ "rewards/accuracies": 0.6812499761581421,
789
+ "rewards/chosen": -2.918997287750244,
790
+ "rewards/margins": 2.5355329513549805,
791
+ "rewards/rejected": -5.454530239105225,
792
  "step": 480,
793
+ "use_label": 4085.14990234375
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.7066356228172297e-05,
798
+ "logits/chosen": -2.6925861835479736,
799
+ "logits/rejected": -2.67038631439209,
800
+ "logps/chosen": -289.875,
801
+ "logps/rejected": -282.19903564453125,
802
+ "loss": 0.2758,
803
+ "pred_label": 3613.125,
804
+ "rewards/accuracies": 0.7250000238418579,
805
+ "rewards/chosen": -2.284597873687744,
806
+ "rewards/margins": 2.195889711380005,
807
+ "rewards/rejected": -4.48048734664917,
808
  "step": 490,
809
+ "use_label": 4148.875
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.6484284051222352e-05,
814
+ "logits/chosen": -2.7040975093841553,
815
+ "logits/rejected": -2.6474618911743164,
816
+ "logps/chosen": -308.2983703613281,
817
+ "logps/rejected": -305.19525146484375,
818
+ "loss": 0.2732,
819
+ "pred_label": 3721.64990234375,
820
+ "rewards/accuracies": 0.7437499761581421,
821
+ "rewards/chosen": -1.8841865062713623,
822
+ "rewards/margins": 2.809114456176758,
823
+ "rewards/rejected": -4.693300724029541,
824
  "step": 500,
825
+ "use_label": 4200.35009765625
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-05,
830
+ "logits/chosen": -2.7423815727233887,
831
+ "logits/rejected": -2.699392795562744,
832
+ "logps/chosen": -296.5220642089844,
833
+ "logps/rejected": -285.8718566894531,
834
+ "loss": 0.2433,
835
+ "pred_label": 3824.625,
836
+ "rewards/accuracies": 0.7749999761581421,
837
+ "rewards/chosen": -2.1184868812561035,
838
+ "rewards/margins": 3.1191751956939697,
839
+ "rewards/rejected": -5.237661361694336,
840
  "step": 510,
841
+ "use_label": 4257.375
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.532013969732247e-05,
846
+ "logits/chosen": -2.7525784969329834,
847
+ "logits/rejected": -2.727067708969116,
848
+ "logps/chosen": -304.08343505859375,
849
+ "logps/rejected": -308.287841796875,
850
+ "loss": 0.2989,
851
+ "pred_label": 3925.175048828125,
852
+ "rewards/accuracies": 0.706250011920929,
853
+ "rewards/chosen": -1.2129416465759277,
854
+ "rewards/margins": 2.644366502761841,
855
+ "rewards/rejected": -3.8573079109191895,
856
  "step": 520,
857
+ "use_label": 4316.8251953125
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372525e-05,
862
+ "logits/chosen": -2.759995937347412,
863
+ "logits/rejected": -2.6951136589050293,
864
+ "logps/chosen": -314.9224548339844,
865
+ "logps/rejected": -335.85101318359375,
866
+ "loss": 0.2862,
867
+ "pred_label": 4020.449951171875,
868
+ "rewards/accuracies": 0.706250011920929,
869
+ "rewards/chosen": -1.4606602191925049,
870
+ "rewards/margins": 2.726986885070801,
871
+ "rewards/rejected": -4.187647342681885,
872
  "step": 530,
873
+ "use_label": 4381.5498046875
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.4155995343422587e-05,
878
+ "logits/chosen": -2.7623682022094727,
879
+ "logits/rejected": -2.743201732635498,
880
+ "logps/chosen": -307.76995849609375,
881
+ "logps/rejected": -319.3795471191406,
882
+ "loss": 0.2676,
883
+ "pred_label": 4129.97509765625,
884
+ "rewards/accuracies": 0.78125,
885
+ "rewards/chosen": -1.9233381748199463,
886
+ "rewards/margins": 3.4177041053771973,
887
+ "rewards/rejected": -5.3410420417785645,
888
  "step": 540,
889
+ "use_label": 4432.02490234375
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472644e-05,
894
+ "logits/chosen": -2.766664981842041,
895
+ "logits/rejected": -2.727250576019287,
896
+ "logps/chosen": -307.920166015625,
897
+ "logps/rejected": -313.8031311035156,
898
+ "loss": 0.2619,
899
+ "pred_label": 4236.1748046875,
900
+ "rewards/accuracies": 0.71875,
901
+ "rewards/chosen": -1.871700644493103,
902
+ "rewards/margins": 3.314380645751953,
903
+ "rewards/rejected": -5.186081886291504,
904
  "step": 550,
905
+ "use_label": 4485.8251953125
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522702e-05,
910
+ "logits/chosen": -2.747933864593506,
911
+ "logits/rejected": -2.743129253387451,
912
+ "logps/chosen": -296.66705322265625,
913
+ "logps/rejected": -261.8858947753906,
914
+ "loss": 0.281,
915
+ "pred_label": 4334.72509765625,
916
+ "rewards/accuracies": 0.75,
917
+ "rewards/chosen": -2.2262706756591797,
918
+ "rewards/margins": 2.213707685470581,
919
+ "rewards/rejected": -4.43997859954834,
920
  "step": 560,
921
+ "use_label": 4547.27490234375
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.240977881257276e-05,
926
+ "logits/chosen": -2.7517714500427246,
927
+ "logits/rejected": -2.7313544750213623,
928
+ "logps/chosen": -288.4844970703125,
929
+ "logps/rejected": -302.5806579589844,
930
+ "loss": 0.2905,
931
+ "pred_label": 4442.875,
932
  "rewards/accuracies": 0.706250011920929,
933
+ "rewards/chosen": -1.8995840549468994,
934
+ "rewards/margins": 2.1551976203918457,
935
+ "rewards/rejected": -4.054781913757324,
936
  "step": 570,
937
+ "use_label": 4599.125
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622818e-05,
942
+ "logits/chosen": -2.7617430686950684,
943
+ "logits/rejected": -2.73646879196167,
944
+ "logps/chosen": -295.9949645996094,
945
+ "logps/rejected": -310.08331298828125,
946
+ "loss": 0.2821,
947
+ "pred_label": 4553.9501953125,
948
+ "rewards/accuracies": 0.768750011920929,
949
+ "rewards/chosen": -1.7886556386947632,
950
+ "rewards/margins": 2.772958993911743,
951
+ "rewards/rejected": -4.561614513397217,
952
  "step": 580,
953
+ "use_label": 4648.0498046875
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.124563445867288e-05,
958
+ "logits/chosen": -2.6858975887298584,
959
+ "logits/rejected": -2.6842644214630127,
960
+ "logps/chosen": -292.4759216308594,
961
+ "logps/rejected": -323.04754638671875,
962
+ "loss": 0.2683,
963
+ "pred_label": 4648.1748046875,
964
+ "rewards/accuracies": 0.6875,
965
+ "rewards/chosen": -2.0878047943115234,
966
+ "rewards/margins": 2.138942241668701,
967
+ "rewards/rejected": -4.226747035980225,
968
  "step": 590,
969
+ "use_label": 4713.8251953125
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722934e-05,
974
+ "logits/chosen": -2.755965232849121,
975
+ "logits/rejected": -2.7383854389190674,
976
+ "logps/chosen": -327.86962890625,
977
+ "logps/rejected": -351.33074951171875,
978
+ "loss": 0.2585,
979
+ "pred_label": 4749.9501953125,
980
+ "rewards/accuracies": 0.7562500238418579,
981
+ "rewards/chosen": -1.4506783485412598,
982
+ "rewards/margins": 3.4063594341278076,
983
+ "rewards/rejected": -4.8570380210876465,
984
  "step": 600,
985
+ "use_label": 4772.0498046875
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.0081490104772992e-05,
990
+ "logits/chosen": -2.7139110565185547,
991
+ "logits/rejected": -2.7096176147460938,
992
+ "logps/chosen": -322.1976013183594,
993
+ "logps/rejected": -281.54266357421875,
994
+ "loss": 0.267,
995
+ "pred_label": 4857.875,
996
+ "rewards/accuracies": 0.706250011920929,
997
+ "rewards/chosen": -1.6288321018218994,
998
+ "rewards/margins": 2.5627994537353516,
999
+ "rewards/rejected": -4.19163179397583,
1000
  "step": 610,
1001
+ "use_label": 4824.125
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.9499417927823053e-05,
1006
+ "logits/chosen": -2.733909845352173,
1007
+ "logits/rejected": -2.6813321113586426,
1008
+ "logps/chosen": -296.90032958984375,
1009
+ "logps/rejected": -298.9272766113281,
1010
+ "loss": 0.2764,
1011
+ "pred_label": 4962.77490234375,
1012
+ "rewards/accuracies": 0.768750011920929,
1013
+ "rewards/chosen": -1.691881775856018,
1014
+ "rewards/margins": 2.9819297790527344,
1015
+ "rewards/rejected": -4.673811912536621,
1016
  "step": 620,
1017
+ "use_label": 4879.22509765625
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-05,
1022
+ "logits/chosen": -2.7139275074005127,
1023
+ "logits/rejected": -2.7132036685943604,
1024
+ "logps/chosen": -297.9289855957031,
1025
+ "logps/rejected": -306.44940185546875,
1026
+ "loss": 0.2715,
1027
+ "pred_label": 5066.4501953125,
1028
+ "rewards/accuracies": 0.7749999761581421,
1029
+ "rewards/chosen": -1.2809231281280518,
1030
+ "rewards/margins": 3.2467029094696045,
1031
+ "rewards/rejected": -4.527626991271973,
1032
  "step": 630,
1033
+ "use_label": 4935.5498046875
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.833527357392317e-05,
1038
+ "logits/chosen": -2.7025070190429688,
1039
+ "logits/rejected": -2.669739246368408,
1040
+ "logps/chosen": -266.36798095703125,
1041
+ "logps/rejected": -293.29669189453125,
1042
+ "loss": 0.2907,
1043
+ "pred_label": 5163.5,
1044
+ "rewards/accuracies": 0.768750011920929,
1045
+ "rewards/chosen": -1.3176727294921875,
1046
+ "rewards/margins": 3.1546790599823,
1047
+ "rewards/rejected": -4.472352027893066,
1048
  "step": 640,
1049
+ "use_label": 4998.5
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973227e-05,
1054
+ "logits/chosen": -2.721750497817993,
1055
+ "logits/rejected": -2.6910438537597656,
1056
+ "logps/chosen": -316.4061279296875,
1057
+ "logps/rejected": -322.0241394042969,
1058
+ "loss": 0.2773,
1059
+ "pred_label": 5267.2001953125,
1060
+ "rewards/accuracies": 0.78125,
1061
+ "rewards/chosen": -1.4457476139068604,
1062
+ "rewards/margins": 2.958343982696533,
1063
+ "rewards/rejected": -4.404091835021973,
1064
  "step": 650,
1065
+ "use_label": 5054.7998046875
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.717112922002328e-05,
1070
+ "logits/chosen": -2.7757656574249268,
1071
+ "logits/rejected": -2.7493858337402344,
1072
+ "logps/chosen": -311.73321533203125,
1073
+ "logps/rejected": -285.9647216796875,
1074
+ "loss": 0.2968,
1075
+ "pred_label": 5371.77490234375,
1076
+ "rewards/accuracies": 0.6625000238418579,
1077
+ "rewards/chosen": -1.636950135231018,
1078
+ "rewards/margins": 1.971967339515686,
1079
+ "rewards/rejected": -3.6089179515838623,
1080
  "step": 660,
1081
+ "use_label": 5110.22509765625
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.6589057043073342e-05,
1086
+ "logits/chosen": -2.7224373817443848,
1087
+ "logits/rejected": -2.686645030975342,
1088
+ "logps/chosen": -299.3620300292969,
1089
+ "logps/rejected": -292.6378173828125,
1090
+ "loss": 0.2702,
1091
+ "pred_label": 5477.2001953125,
1092
+ "rewards/accuracies": 0.7875000238418579,
1093
+ "rewards/chosen": -1.828905701637268,
1094
+ "rewards/margins": 3.295133113861084,
1095
+ "rewards/rejected": -5.124039173126221,
1096
  "step": 670,
1097
+ "use_label": 5164.7998046875
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-05,
1102
+ "logits/chosen": -2.7378199100494385,
1103
+ "logits/rejected": -2.7310595512390137,
1104
+ "logps/chosen": -304.2184753417969,
1105
+ "logps/rejected": -292.1669921875,
1106
+ "loss": 0.3204,
1107
+ "pred_label": 5576.47509765625,
1108
+ "rewards/accuracies": 0.668749988079071,
1109
+ "rewards/chosen": -1.7283070087432861,
1110
+ "rewards/margins": 1.6653932332992554,
1111
+ "rewards/rejected": -3.393700361251831,
1112
  "step": 680,
1113
+ "use_label": 5225.52490234375
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173458e-05,
1118
+ "logits/chosen": -2.7175381183624268,
1119
+ "logits/rejected": -2.7000772953033447,
1120
+ "logps/chosen": -266.01263427734375,
1121
+ "logps/rejected": -258.3471374511719,
1122
+ "loss": 0.258,
1123
+ "pred_label": 5677.625,
1124
+ "rewards/accuracies": 0.7749999761581421,
1125
+ "rewards/chosen": -1.56284499168396,
1126
+ "rewards/margins": 3.3434224128723145,
1127
+ "rewards/rejected": -4.9062676429748535,
1128
  "step": 690,
1129
+ "use_label": 5284.375
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223516e-05,
1134
+ "logits/chosen": -2.735502243041992,
1135
+ "logits/rejected": -2.690195322036743,
1136
+ "logps/chosen": -293.5663146972656,
1137
+ "logps/rejected": -299.68121337890625,
1138
+ "loss": 0.2598,
1139
+ "pred_label": 5784.4248046875,
1140
+ "rewards/accuracies": 0.7250000238418579,
1141
+ "rewards/chosen": -1.7876224517822266,
1142
+ "rewards/margins": 3.3526370525360107,
1143
+ "rewards/rejected": -5.140259742736816,
1144
  "step": 700,
1145
+ "use_label": 5337.5751953125
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273575e-05,
1150
+ "logits/chosen": -2.7820751667022705,
1151
+ "logits/rejected": -2.74092173576355,
1152
+ "logps/chosen": -305.2626953125,
1153
+ "logps/rejected": -284.5972900390625,
1154
+ "loss": 0.2792,
1155
+ "pred_label": 5887.0498046875,
1156
+ "rewards/accuracies": 0.7250000238418579,
1157
+ "rewards/chosen": -1.5688129663467407,
1158
+ "rewards/margins": 2.764045000076294,
1159
+ "rewards/rejected": -4.332858085632324,
1160
  "step": 710,
1161
+ "use_label": 5394.9501953125
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323633e-05,
1166
+ "logits/chosen": -2.741055727005005,
1167
+ "logits/rejected": -2.739257335662842,
1168
+ "logps/chosen": -288.95611572265625,
1169
+ "logps/rejected": -306.3753967285156,
1170
+ "loss": 0.2461,
1171
+ "pred_label": 5995.5498046875,
1172
+ "rewards/accuracies": 0.78125,
1173
+ "rewards/chosen": -1.7503833770751953,
1174
+ "rewards/margins": 3.5468757152557373,
1175
+ "rewards/rejected": -5.297258377075195,
1176
  "step": 720,
1177
+ "use_label": 5446.4501953125
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.309662398137369e-05,
1182
+ "logits/chosen": -2.7259597778320312,
1183
+ "logits/rejected": -2.719268321990967,
1184
+ "logps/chosen": -315.80181884765625,
1185
+ "logps/rejected": -282.5977783203125,
1186
+ "loss": 0.2265,
1187
+ "pred_label": 6100.625,
1188
+ "rewards/accuracies": 0.7124999761581421,
1189
+ "rewards/chosen": -1.8351987600326538,
1190
+ "rewards/margins": 2.257732629776001,
1191
+ "rewards/rejected": -4.092931270599365,
1192
  "step": 730,
1193
+ "use_label": 5501.375
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-05,
1198
+ "logits/chosen": -2.704035758972168,
1199
+ "logits/rejected": -2.694396495819092,
1200
+ "logps/chosen": -332.0672912597656,
1201
+ "logps/rejected": -348.5420837402344,
1202
+ "loss": 0.254,
1203
+ "pred_label": 6200.5751953125,
1204
+ "rewards/accuracies": 0.7749999761581421,
1205
+ "rewards/chosen": -2.469543933868408,
1206
+ "rewards/margins": 3.7357654571533203,
1207
+ "rewards/rejected": -6.2053093910217285,
1208
  "step": 740,
1209
+ "use_label": 5561.4248046875
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-05,
1214
+ "logits/chosen": -2.717568874359131,
1215
+ "logits/rejected": -2.6717875003814697,
1216
+ "logps/chosen": -285.0014343261719,
1217
+ "logps/rejected": -322.31671142578125,
1218
+ "loss": 0.2139,
1219
+ "pred_label": 6312.14990234375,
1220
+ "rewards/accuracies": 0.762499988079071,
1221
+ "rewards/chosen": -2.451322078704834,
1222
+ "rewards/margins": 3.727611541748047,
1223
+ "rewards/rejected": -6.178933620452881,
1224
  "step": 750,
1225
+ "use_label": 5609.85009765625
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523866e-05,
1230
+ "logits/chosen": -2.719574451446533,
1231
+ "logits/rejected": -2.7501304149627686,
1232
+ "logps/chosen": -310.88677978515625,
1233
+ "logps/rejected": -308.3591003417969,
1234
+ "loss": 0.2668,
1235
+ "pred_label": 6422.97509765625,
1236
+ "rewards/accuracies": 0.768750011920929,
1237
+ "rewards/chosen": -1.5423939228057861,
1238
+ "rewards/margins": 3.0822057723999023,
1239
+ "rewards/rejected": -4.624599456787109,
1240
  "step": 760,
1241
+ "use_label": 5659.02490234375
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-05,
1246
+ "logits/chosen": -2.746647357940674,
1247
+ "logits/rejected": -2.733283519744873,
1248
+ "logps/chosen": -320.37213134765625,
1249
+ "logps/rejected": -333.1225280761719,
1250
+ "loss": 0.248,
1251
+ "pred_label": 6535.22509765625,
1252
+ "rewards/accuracies": 0.737500011920929,
1253
+ "rewards/chosen": -1.807631492614746,
1254
+ "rewards/margins": 3.0322647094726562,
1255
+ "rewards/rejected": -4.839896202087402,
1256
  "step": 770,
1257
+ "use_label": 5706.77490234375
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623982e-05,
1262
+ "logits/chosen": -2.7445006370544434,
1263
+ "logits/rejected": -2.7238264083862305,
1264
+ "logps/chosen": -303.70404052734375,
1265
+ "logps/rejected": -301.3880615234375,
1266
+ "loss": 0.2467,
1267
+ "pred_label": 6641.10009765625,
1268
+ "rewards/accuracies": 0.737500011920929,
1269
+ "rewards/chosen": -2.2510251998901367,
1270
+ "rewards/margins": 3.1742377281188965,
1271
+ "rewards/rejected": -5.425262928009033,
1272
  "step": 780,
1273
+ "use_label": 5760.89990234375
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.60419091967404e-06,
1278
+ "logits/chosen": -2.7392661571502686,
1279
+ "logits/rejected": -2.7158615589141846,
1280
+ "logps/chosen": -294.07037353515625,
1281
+ "logps/rejected": -322.58758544921875,
1282
+ "loss": 0.2523,
1283
+ "pred_label": 6745.9501953125,
1284
+ "rewards/accuracies": 0.7124999761581421,
1285
+ "rewards/chosen": -2.626100778579712,
1286
+ "rewards/margins": 3.5596566200256348,
1287
+ "rewards/rejected": -6.185757637023926,
1288
  "step": 790,
1289
+ "use_label": 5816.0498046875
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724098e-06,
1294
+ "logits/chosen": -2.6276087760925293,
1295
+ "logits/rejected": -2.5911715030670166,
1296
+ "logps/chosen": -343.8062438964844,
1297
+ "logps/rejected": -343.41070556640625,
1298
+ "loss": 0.2253,
1299
+ "pred_label": 6867.2998046875,
1300
+ "rewards/accuracies": 0.737500011920929,
1301
+ "rewards/chosen": -2.9334328174591064,
1302
+ "rewards/margins": 3.8014297485351562,
1303
+ "rewards/rejected": -6.73486328125,
1304
  "step": 800,
1305
+ "use_label": 5854.7001953125
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774158e-06,
1310
+ "logits/chosen": -2.6645331382751465,
1311
+ "logits/rejected": -2.640127658843994,
1312
+ "logps/chosen": -313.86480712890625,
1313
+ "logps/rejected": -287.8795471191406,
1314
+ "loss": 0.2183,
1315
+ "pred_label": 6983.14990234375,
1316
+ "rewards/accuracies": 0.6937500238418579,
1317
+ "rewards/chosen": -3.2840819358825684,
1318
+ "rewards/margins": 2.622573137283325,
1319
+ "rewards/rejected": -5.9066548347473145,
1320
  "step": 810,
1321
+ "use_label": 5898.85009765625
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824214e-06,
1326
+ "logits/chosen": -2.684762477874756,
1327
+ "logits/rejected": -2.6731903553009033,
1328
+ "logps/chosen": -316.68157958984375,
1329
+ "logps/rejected": -307.9013671875,
1330
+ "loss": 0.2752,
1331
+ "pred_label": 7098.10009765625,
1332
+ "rewards/accuracies": 0.768750011920929,
1333
+ "rewards/chosen": -2.3363418579101562,
1334
+ "rewards/margins": 4.297635078430176,
1335
+ "rewards/rejected": -6.63397741317749,
1336
  "step": 820,
1337
+ "use_label": 5943.89990234375
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-06,
1342
+ "logits/chosen": -2.7039098739624023,
1343
+ "logits/rejected": -2.6936678886413574,
1344
+ "logps/chosen": -289.11761474609375,
1345
+ "logps/rejected": -341.92388916015625,
1346
+ "loss": 0.2433,
1347
+ "pred_label": 7206.47509765625,
1348
+ "rewards/accuracies": 0.71875,
1349
+ "rewards/chosen": -2.6304259300231934,
1350
+ "rewards/margins": 3.5417304039001465,
1351
+ "rewards/rejected": -6.172156810760498,
1352
  "step": 830,
1353
+ "use_label": 5995.52490234375
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-06,
1358
+ "logits/chosen": -2.6950812339782715,
1359
+ "logits/rejected": -2.695385456085205,
1360
+ "logps/chosen": -295.2992858886719,
1361
+ "logps/rejected": -315.1665954589844,
1362
+ "loss": 0.2626,
1363
+ "pred_label": 7316.375,
1364
+ "rewards/accuracies": 0.7124999761581421,
1365
+ "rewards/chosen": -2.7282135486602783,
1366
+ "rewards/margins": 2.3400490283966064,
1367
+ "rewards/rejected": -5.068262577056885,
1368
  "step": 840,
1369
+ "use_label": 6045.625
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-06,
1374
+ "logits/chosen": -2.691633701324463,
1375
+ "logits/rejected": -2.7306087017059326,
1376
+ "logps/chosen": -319.0354919433594,
1377
+ "logps/rejected": -299.2270812988281,
1378
+ "loss": 0.2547,
1379
+ "pred_label": 7422.2001953125,
1380
+ "rewards/accuracies": 0.7749999761581421,
1381
+ "rewards/chosen": -2.400865316390991,
1382
+ "rewards/margins": 3.1857247352600098,
1383
+ "rewards/rejected": -5.586589813232422,
1384
  "step": 850,
1385
+ "use_label": 6099.7998046875
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024447e-06,
1390
+ "logits/chosen": -2.6894938945770264,
1391
+ "logits/rejected": -2.6826186180114746,
1392
+ "logps/chosen": -314.03753662109375,
1393
+ "logps/rejected": -334.26690673828125,
1394
+ "loss": 0.2358,
1395
+ "pred_label": 7535.10009765625,
1396
+ "rewards/accuracies": 0.75,
1397
+ "rewards/chosen": -2.9154160022735596,
1398
+ "rewards/margins": 3.2621803283691406,
1399
+ "rewards/rejected": -6.177596092224121,
1400
  "step": 860,
1401
+ "use_label": 6146.89990234375
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074506e-06,
1406
+ "logits/chosen": -2.6914477348327637,
1407
+ "logits/rejected": -2.6861438751220703,
1408
+ "logps/chosen": -267.033203125,
1409
+ "logps/rejected": -298.70428466796875,
1410
+ "loss": 0.2361,
1411
+ "pred_label": 7641.60009765625,
1412
+ "rewards/accuracies": 0.737500011920929,
1413
+ "rewards/chosen": -2.404568910598755,
1414
+ "rewards/margins": 3.4409728050231934,
1415
+ "rewards/rejected": -5.845541477203369,
1416
  "step": 870,
1417
+ "use_label": 6200.39990234375
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.3655413271245635e-06,
1422
+ "logits/chosen": -2.7052195072174072,
1423
+ "logits/rejected": -2.6577229499816895,
1424
+ "logps/chosen": -311.93719482421875,
1425
+ "logps/rejected": -314.8600769042969,
1426
+ "loss": 0.2224,
1427
+ "pred_label": 7745.8251953125,
1428
+ "rewards/accuracies": 0.7749999761581421,
1429
+ "rewards/chosen": -2.5347952842712402,
1430
+ "rewards/margins": 2.786163330078125,
1431
+ "rewards/rejected": -5.320958614349365,
1432
  "step": 880,
1433
+ "use_label": 6256.1748046875
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.7834691501746217e-06,
1438
+ "logits/chosen": -2.718468189239502,
1439
+ "logits/rejected": -2.710252523422241,
1440
+ "logps/chosen": -304.1473693847656,
1441
+ "logps/rejected": -332.4493103027344,
1442
+ "loss": 0.2246,
1443
+ "pred_label": 7864.375,
1444
+ "rewards/accuracies": 0.793749988079071,
1445
+ "rewards/chosen": -2.2862813472747803,
1446
+ "rewards/margins": 3.7558963298797607,
1447
+ "rewards/rejected": -6.042177677154541,
1448
  "step": 890,
1449
+ "use_label": 6297.625
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.2013969732246805e-06,
1454
+ "logits/chosen": -2.6227777004241943,
1455
+ "logits/rejected": -2.571324110031128,
1456
+ "logps/chosen": -307.7371826171875,
1457
+ "logps/rejected": -324.693359375,
1458
+ "loss": 0.2493,
1459
+ "pred_label": 7978.625,
1460
+ "rewards/accuracies": 0.7562500238418579,
1461
+ "rewards/chosen": -2.859910488128662,
1462
+ "rewards/margins": 3.757611036300659,
1463
+ "rewards/rejected": -6.617520809173584,
1464
  "step": 900,
1465
+ "use_label": 6343.375
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.6193247962747383e-06,
1470
+ "logits/chosen": -2.701064109802246,
1471
+ "logits/rejected": -2.665743350982666,
1472
+ "logps/chosen": -323.77337646484375,
1473
+ "logps/rejected": -322.24432373046875,
1474
+ "loss": 0.238,
1475
+ "pred_label": 8087.5,
1476
+ "rewards/accuracies": 0.75,
1477
+ "rewards/chosen": -2.7357375621795654,
1478
+ "rewards/margins": 3.1752612590789795,
1479
+ "rewards/rejected": -5.910999298095703,
1480
  "step": 910,
1481
+ "use_label": 6394.5
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-06,
1486
+ "logits/chosen": -2.639965772628784,
1487
+ "logits/rejected": -2.6299610137939453,
1488
+ "logps/chosen": -327.05975341796875,
1489
+ "logps/rejected": -325.63909912109375,
1490
+ "loss": 0.2258,
1491
+ "pred_label": 8202.6748046875,
1492
+ "rewards/accuracies": 0.737500011920929,
1493
+ "rewards/chosen": -3.017204761505127,
1494
+ "rewards/margins": 3.5882301330566406,
1495
+ "rewards/rejected": -6.605435371398926,
1496
  "step": 920,
1497
+ "use_label": 6439.3251953125
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-06,
1502
+ "logits/chosen": -2.69065260887146,
1503
+ "logits/rejected": -2.6345713138580322,
1504
+ "logps/chosen": -286.16278076171875,
1505
+ "logps/rejected": -308.4196472167969,
1506
+ "loss": 0.2207,
1507
+ "pred_label": 8314.349609375,
1508
  "rewards/accuracies": 0.8125,
1509
+ "rewards/chosen": -1.7949655055999756,
1510
+ "rewards/margins": 4.867483139038086,
1511
+ "rewards/rejected": -6.662448883056641,
1512
  "step": 930,
1513
+ "use_label": 6487.64990234375
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249127e-07,
1518
+ "logits/chosen": -2.71934175491333,
1519
+ "logits/rejected": -2.67936372756958,
1520
+ "logps/chosen": -318.2463073730469,
1521
+ "logps/rejected": -326.56634521484375,
1522
+ "loss": 0.234,
1523
+ "pred_label": 8430.5751953125,
1524
+ "rewards/accuracies": 0.7250000238418579,
1525
+ "rewards/chosen": -3.0853824615478516,
1526
+ "rewards/margins": 2.8827648162841797,
1527
+ "rewards/rejected": -5.968147277832031,
1528
  "step": 940,
1529
+ "use_label": 6531.4248046875
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-07,
1534
+ "logits/chosen": -2.6776463985443115,
1535
+ "logits/rejected": -2.6152024269104004,
1536
+ "logps/chosen": -300.53662109375,
1537
+ "logps/rejected": -322.4374694824219,
1538
+ "loss": 0.2135,
1539
+ "pred_label": 8549.625,
1540
+ "rewards/accuracies": 0.7562500238418579,
1541
+ "rewards/chosen": -3.2081990242004395,
1542
+ "rewards/margins": 3.740739107131958,
1543
+ "rewards/rejected": -6.94893741607666,
1544
  "step": 950,
1545
+ "use_label": 6572.375
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
+ "eval_logits/chosen": -2.6974706649780273,
1550
+ "eval_logits/rejected": -2.665019989013672,
1551
+ "eval_logps/chosen": -311.6889953613281,
1552
+ "eval_logps/rejected": -322.95947265625,
1553
+ "eval_loss": 0.2408759742975235,
1554
+ "eval_pred_label": 8833.576171875,
1555
+ "eval_rewards/accuracies": 0.734000027179718,
1556
+ "eval_rewards/chosen": -2.7431609630584717,
1557
+ "eval_rewards/margins": 3.6228184700012207,
1558
+ "eval_rewards/rejected": -6.36598014831543,
1559
+ "eval_runtime": 452.5439,
1560
+ "eval_samples_per_second": 4.419,
1561
+ "eval_steps_per_second": 0.276,
1562
+ "eval_use_label": 6698.423828125,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
+ "train_loss": 0.31699458866219243,
1570
+ "train_runtime": 25218.7851,
1571
+ "train_samples_per_second": 2.424,
1572
  "train_steps_per_second": 0.038
1573
  }
1574
  ],