jikaixuan commited on
Commit
1bdf2f2
1 Parent(s): 64c04ad

Model save

Browse files
Files changed (6) hide show
  1. README.md +12 -12
  2. adapter_model.safetensors +1 -1
  3. all_results.json +17 -17
  4. eval_results.json +14 -14
  5. train_results.json +3 -3
  6. trainer_state.json +1028 -1028
README.md CHANGED
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.0116
19
- - Rewards/chosen: -1343.7761
20
- - Rewards/rejected: -1133.7241
21
- - Rewards/accuracies: 0.4740
22
- - Rewards/margins: -210.0521
23
- - Logps/rejected: -11596.5400
24
- - Logps/chosen: -13722.0166
25
- - Logits/rejected: 13.8132
26
- - Logits/chosen: 13.8244
27
- - Use Label: 1746.1600
28
- - Pred Label: 14285.8398
29
 
30
  ## Model description
31
 
@@ -62,7 +62,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
- | 0.0144 | 1.0 | 955 | 0.0116 | -1343.7761 | -1133.7241 | 0.4740 | -210.0521 | -11596.5400 | -13722.0166 | 13.8132 | 13.8244 | 1742.1600 | 13789.8398 |
66
 
67
 
68
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.0758
19
+ - Rewards/chosen: -15.6780
20
+ - Rewards/rejected: -27.9661
21
+ - Rewards/accuracies: 0.7080
22
+ - Rewards/margins: 12.2881
23
+ - Logps/rejected: -538.9609
24
+ - Logps/chosen: -441.0378
25
+ - Logits/rejected: -2.2748
26
+ - Logits/chosen: -2.3871
27
+ - Use Label: 2488.2161
28
+ - Pred Label: 13543.7842
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
65
+ | 0.069 | 1.0 | 955 | 0.0758 | -15.6780 | -27.9661 | 0.7080 | 12.2881 | -538.9609 | -441.0378 | -2.2748 | -2.3871 | 2447.2161 | 13084.7842 |
66
 
67
 
68
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f8854ee7294074c5457595568f950a940efbea9685cdb34d3612345426abfcf
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeeffd360fdf6ce49d5baeca66af17570725e7e6e43a43b0a4910baa598329f4
3
  size 218138576
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 13.824411392211914,
4
- "eval_logits/rejected": 13.813151359558105,
5
- "eval_logps/chosen": -13722.0166015625,
6
- "eval_logps/rejected": -11596.5400390625,
7
- "eval_loss": 0.011624496430158615,
8
- "eval_pred_label": 14285.83984375,
9
- "eval_rewards/accuracies": 0.4740000069141388,
10
- "eval_rewards/chosen": -1343.776123046875,
11
- "eval_rewards/margins": -210.05210876464844,
12
- "eval_rewards/rejected": -1133.72412109375,
13
- "eval_runtime": 455.4362,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.391,
16
- "eval_steps_per_second": 0.274,
17
- "eval_use_label": 1746.1600341796875,
18
- "train_loss": 0.08065580570807007,
19
- "train_runtime": 25294.0492,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 2.417,
22
  "train_steps_per_second": 0.038
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.3870656490325928,
4
+ "eval_logits/rejected": -2.274824380874634,
5
+ "eval_logps/chosen": -441.0377502441406,
6
+ "eval_logps/rejected": -538.9608764648438,
7
+ "eval_loss": 0.07583338022232056,
8
+ "eval_pred_label": 13543.7841796875,
9
+ "eval_rewards/accuracies": 0.7080000042915344,
10
+ "eval_rewards/chosen": -15.678034782409668,
11
+ "eval_rewards/margins": 12.288079261779785,
12
+ "eval_rewards/rejected": -27.966114044189453,
13
+ "eval_runtime": 449.5661,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.449,
16
+ "eval_steps_per_second": 0.278,
17
+ "eval_use_label": 2488.216064453125,
18
+ "train_loss": 0.12920980815488006,
19
+ "train_runtime": 25162.7962,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 2.43,
22
  "train_steps_per_second": 0.038
23
  }
eval_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 13.824411392211914,
4
- "eval_logits/rejected": 13.813151359558105,
5
- "eval_logps/chosen": -13722.0166015625,
6
- "eval_logps/rejected": -11596.5400390625,
7
- "eval_loss": 0.011624496430158615,
8
- "eval_pred_label": 14285.83984375,
9
- "eval_rewards/accuracies": 0.4740000069141388,
10
- "eval_rewards/chosen": -1343.776123046875,
11
- "eval_rewards/margins": -210.05210876464844,
12
- "eval_rewards/rejected": -1133.72412109375,
13
- "eval_runtime": 455.4362,
14
  "eval_samples": 2000,
15
- "eval_samples_per_second": 4.391,
16
- "eval_steps_per_second": 0.274,
17
- "eval_use_label": 1746.1600341796875
18
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -2.3870656490325928,
4
+ "eval_logits/rejected": -2.274824380874634,
5
+ "eval_logps/chosen": -441.0377502441406,
6
+ "eval_logps/rejected": -538.9608764648438,
7
+ "eval_loss": 0.07583338022232056,
8
+ "eval_pred_label": 13543.7841796875,
9
+ "eval_rewards/accuracies": 0.7080000042915344,
10
+ "eval_rewards/chosen": -15.678034782409668,
11
+ "eval_rewards/margins": 12.288079261779785,
12
+ "eval_rewards/rejected": -27.966114044189453,
13
+ "eval_runtime": 449.5661,
14
  "eval_samples": 2000,
15
+ "eval_samples_per_second": 4.449,
16
+ "eval_steps_per_second": 0.278,
17
+ "eval_use_label": 2488.216064453125
18
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.08065580570807007,
4
- "train_runtime": 25294.0492,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 2.417,
7
  "train_steps_per_second": 0.038
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.12920980815488006,
4
+ "train_runtime": 25162.7962,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 2.43,
7
  "train_steps_per_second": 0.038
8
  }
trainer_state.json CHANGED
@@ -75,1500 +75,1500 @@
75
  {
76
  "epoch": 0.04,
77
  "learning_rate": 2.0833333333333336e-05,
78
- "logits/chosen": -2.840946674346924,
79
- "logits/rejected": -2.8493659496307373,
80
- "logps/chosen": -281.32928466796875,
81
- "logps/rejected": -277.8607482910156,
82
- "loss": 0.6339,
83
- "pred_label": 0.4749999940395355,
84
- "rewards/accuracies": 0.6875,
85
- "rewards/chosen": 0.02641097828745842,
86
- "rewards/margins": 0.2079576551914215,
87
- "rewards/rejected": -0.1815466731786728,
88
  "step": 40,
89
- "use_label": 561.5250244140625
90
  },
91
  {
92
  "epoch": 0.05,
93
  "learning_rate": 2.604166666666667e-05,
94
- "logits/chosen": -2.8537254333496094,
95
- "logits/rejected": -2.8391127586364746,
96
- "logps/chosen": -266.79296875,
97
- "logps/rejected": -262.0001220703125,
98
- "loss": 0.5836,
99
- "pred_label": 5.775000095367432,
100
- "rewards/accuracies": 0.643750011920929,
101
- "rewards/chosen": -0.06846104562282562,
102
- "rewards/margins": 0.33990827202796936,
103
- "rewards/rejected": -0.4083693027496338,
104
  "step": 50,
105
- "use_label": 716.2249755859375
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
- "logits/chosen": -2.8152918815612793,
111
- "logits/rejected": -2.804291009902954,
112
- "logps/chosen": -301.41326904296875,
113
- "logps/rejected": -291.53997802734375,
114
- "loss": 0.5613,
115
- "pred_label": 28.600000381469727,
116
- "rewards/accuracies": 0.699999988079071,
117
- "rewards/chosen": -0.09980294108390808,
118
- "rewards/margins": 0.4436502456665039,
119
- "rewards/rejected": -0.5434532165527344,
120
  "step": 60,
121
- "use_label": 853.4000244140625
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.6458333333333336e-05,
126
- "logits/chosen": -2.8159656524658203,
127
- "logits/rejected": -2.807382345199585,
128
- "logps/chosen": -295.85113525390625,
129
- "logps/rejected": -281.4297180175781,
130
- "loss": 0.4736,
131
- "pred_label": 72.82499694824219,
132
- "rewards/accuracies": 0.731249988079071,
133
- "rewards/chosen": -0.15376296639442444,
134
- "rewards/margins": 0.6926594972610474,
135
- "rewards/rejected": -0.8464224934577942,
136
  "step": 70,
137
- "use_label": 969.1749877929688
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
- "logits/chosen": -2.760671377182007,
143
- "logits/rejected": -2.745089292526245,
144
- "logps/chosen": -309.682861328125,
145
- "logps/rejected": -294.1726989746094,
146
- "loss": 0.3682,
147
- "pred_label": 129.4499969482422,
148
- "rewards/accuracies": 0.731249988079071,
149
- "rewards/chosen": -0.4377492070198059,
150
- "rewards/margins": 1.0782606601715088,
151
- "rewards/rejected": -1.516010046005249,
152
  "step": 80,
153
- "use_label": 1072.550048828125
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6875e-05,
158
- "logits/chosen": -2.689037799835205,
159
- "logits/rejected": -2.7456631660461426,
160
- "logps/chosen": -298.6680603027344,
161
- "logps/rejected": -281.3171081542969,
162
- "loss": 0.3626,
163
- "pred_label": 213.02499389648438,
164
- "rewards/accuracies": 0.731249988079071,
165
- "rewards/chosen": -0.28849169611930847,
166
- "rewards/margins": 1.2157728672027588,
167
- "rewards/rejected": -1.5042643547058105,
168
  "step": 90,
169
- "use_label": 1148.9749755859375
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922003e-05,
174
- "logits/chosen": -2.722339153289795,
175
- "logits/rejected": -2.718428611755371,
176
- "logps/chosen": -287.2553405761719,
177
- "logps/rejected": -325.00335693359375,
178
- "loss": 0.3168,
179
- "pred_label": 303.125,
180
- "rewards/accuracies": 0.71875,
181
- "rewards/chosen": -1.3123645782470703,
182
- "rewards/margins": 1.8343286514282227,
183
- "rewards/rejected": -3.146693468093872,
184
  "step": 100,
185
- "use_label": 1218.875
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227008e-05,
190
- "logits/chosen": -2.694249391555786,
191
- "logits/rejected": -2.633723497390747,
192
- "logps/chosen": -288.16387939453125,
193
- "logps/rejected": -293.7809143066406,
194
- "loss": 0.2607,
195
- "pred_label": 402.625,
196
  "rewards/accuracies": 0.675000011920929,
197
- "rewards/chosen": -2.3489272594451904,
198
- "rewards/margins": 2.1118221282958984,
199
- "rewards/rejected": -4.46074914932251,
200
  "step": 110,
201
- "use_label": 1279.375
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-05,
206
- "logits/chosen": -2.718721866607666,
207
- "logits/rejected": -2.699587345123291,
208
- "logps/chosen": -292.71112060546875,
209
- "logps/rejected": -279.4311218261719,
210
- "loss": 0.2879,
211
- "pred_label": 507.5,
212
- "rewards/accuracies": 0.6937500238418579,
213
- "rewards/chosen": -1.3198258876800537,
214
- "rewards/margins": 1.854914903640747,
215
- "rewards/rejected": -3.1747405529022217,
216
  "step": 120,
217
- "use_label": 1334.5
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.80209545983702e-05,
222
- "logits/chosen": -2.7755086421966553,
223
- "logits/rejected": -2.7087435722351074,
224
- "logps/chosen": -329.43267822265625,
225
- "logps/rejected": -308.2383728027344,
226
- "loss": 0.2811,
227
- "pred_label": 611.7249755859375,
228
- "rewards/accuracies": 0.65625,
229
- "rewards/chosen": -2.216526508331299,
230
- "rewards/margins": 1.2961757183074951,
231
- "rewards/rejected": -3.512702226638794,
232
  "step": 130,
233
- "use_label": 1390.2750244140625
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-05,
238
- "logits/chosen": -2.6767191886901855,
239
- "logits/rejected": -2.643078327178955,
240
- "logps/chosen": -318.53924560546875,
241
- "logps/rejected": -322.80078125,
242
- "loss": 0.1985,
243
- "pred_label": 719.9749755859375,
244
- "rewards/accuracies": 0.6812499761581421,
245
- "rewards/chosen": -6.043245792388916,
246
- "rewards/margins": 2.734940767288208,
247
- "rewards/rejected": -8.778186798095703,
248
  "step": 140,
249
- "use_label": 1442.0250244140625
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447032e-05,
254
- "logits/chosen": -2.008868932723999,
255
- "logits/rejected": -2.024056911468506,
256
- "logps/chosen": -2590.871337890625,
257
- "logps/rejected": -2381.74951171875,
258
- "loss": 0.037,
259
- "pred_label": 853.2249755859375,
260
- "rewards/accuracies": 0.512499988079071,
261
- "rewards/chosen": -229.59854125976562,
262
- "rewards/margins": -19.598337173461914,
263
- "rewards/rejected": -210.0001983642578,
264
  "step": 150,
265
- "use_label": 1468.7750244140625
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.6274738067520374e-05,
270
- "logits/chosen": -3.4510104656219482,
271
- "logits/rejected": -3.4814345836639404,
272
- "logps/chosen": -5424.06201171875,
273
- "logps/rejected": -4965.0986328125,
274
- "loss": 0.0229,
275
- "pred_label": 1008.5750122070312,
276
- "rewards/accuracies": 0.4312500059604645,
277
- "rewards/chosen": -516.1680297851562,
278
- "rewards/margins": -46.562461853027344,
279
- "rewards/rejected": -469.6055603027344,
280
  "step": 160,
281
- "use_label": 1473.425048828125
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570435e-05,
286
- "logits/chosen": -3.6305947303771973,
287
- "logits/rejected": -3.6412110328674316,
288
- "logps/chosen": -5863.26220703125,
289
- "logps/rejected": -4459.16650390625,
290
- "loss": 0.0239,
291
- "pred_label": 1161.25,
292
- "rewards/accuracies": 0.40625,
293
- "rewards/chosen": -556.6785888671875,
294
- "rewards/margins": -135.7264862060547,
295
- "rewards/rejected": -420.95208740234375,
296
  "step": 170,
297
- "use_label": 1480.75
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.511059371362049e-05,
302
- "logits/chosen": -3.826639175415039,
303
- "logits/rejected": -3.826951503753662,
304
- "logps/chosen": -5895.041015625,
305
- "logps/rejected": -5085.115234375,
306
- "loss": 0.021,
307
- "pred_label": 1315.800048828125,
308
- "rewards/accuracies": 0.4375,
309
- "rewards/chosen": -563.9112548828125,
310
- "rewards/margins": -82.46792602539062,
311
- "rewards/rejected": -481.443359375,
312
  "step": 180,
313
- "use_label": 1486.199951171875
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.452852153667055e-05,
318
- "logits/chosen": -3.8287880420684814,
319
- "logits/rejected": -3.829810619354248,
320
- "logps/chosen": -6264.6552734375,
321
- "logps/rejected": -4964.57666015625,
322
- "loss": 0.0083,
323
- "pred_label": 1472.0,
324
- "rewards/accuracies": 0.3687500059604645,
325
- "rewards/chosen": -595.7398681640625,
326
- "rewards/margins": -125.386474609375,
327
- "rewards/rejected": -470.3534240722656,
328
  "step": 190,
329
- "use_label": 1490.0
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.394644935972061e-05,
334
- "logits/chosen": -3.800830364227295,
335
- "logits/rejected": -3.8033287525177,
336
- "logps/chosen": -5603.17431640625,
337
- "logps/rejected": -5157.21826171875,
338
- "loss": 0.0242,
339
- "pred_label": 1629.2249755859375,
340
- "rewards/accuracies": 0.44999998807907104,
341
- "rewards/chosen": -534.1845092773438,
342
- "rewards/margins": -45.28679656982422,
343
- "rewards/rejected": -488.897705078125,
344
  "step": 200,
345
- "use_label": 1492.7750244140625
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277067e-05,
350
- "logits/chosen": -3.7285819053649902,
351
- "logits/rejected": -3.7191810607910156,
352
- "logps/chosen": -6087.337890625,
353
- "logps/rejected": -5075.240234375,
354
- "loss": 0.0165,
355
- "pred_label": 1786.125,
356
- "rewards/accuracies": 0.4312500059604645,
357
- "rewards/chosen": -578.6089477539062,
358
- "rewards/margins": -96.50392150878906,
359
- "rewards/rejected": -482.1050720214844,
360
  "step": 210,
361
- "use_label": 1495.875
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-05,
366
- "logits/chosen": -3.7653274536132812,
367
- "logits/rejected": -3.7663722038269043,
368
- "logps/chosen": -5865.328125,
369
- "logps/rejected": -5630.29248046875,
370
- "loss": 0.0263,
371
- "pred_label": 1942.125,
372
- "rewards/accuracies": 0.48750001192092896,
373
- "rewards/chosen": -559.0337524414062,
374
- "rewards/margins": -23.070148468017578,
375
- "rewards/rejected": -535.9635620117188,
376
  "step": 220,
377
- "use_label": 1499.875
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-05,
382
- "logits/chosen": -3.8049216270446777,
383
- "logits/rejected": -3.8088595867156982,
384
- "logps/chosen": -6366.97509765625,
385
- "logps/rejected": -5381.87548828125,
386
- "loss": 0.0175,
387
- "pred_label": 2098.27490234375,
388
- "rewards/accuracies": 0.42500001192092896,
389
- "rewards/chosen": -605.8801879882812,
390
- "rewards/margins": -94.10356140136719,
391
- "rewards/rejected": -511.776611328125,
392
  "step": 230,
393
- "use_label": 1503.7249755859375
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.161816065192084e-05,
398
- "logits/chosen": -3.80168080329895,
399
- "logits/rejected": -3.802356243133545,
400
- "logps/chosen": -5398.353515625,
401
- "logps/rejected": -4512.5625,
402
- "loss": 0.0201,
403
- "pred_label": 2253.375,
404
- "rewards/accuracies": 0.3812499940395355,
405
- "rewards/chosen": -512.3775634765625,
406
- "rewards/margins": -83.42332458496094,
407
- "rewards/rejected": -428.95428466796875,
408
  "step": 240,
409
- "use_label": 1508.625
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.10360884749709e-05,
414
- "logits/chosen": -3.815431594848633,
415
- "logits/rejected": -3.8156495094299316,
416
- "logps/chosen": -6113.8330078125,
417
- "logps/rejected": -5319.52783203125,
418
- "loss": 0.0204,
419
- "pred_label": 2408.97509765625,
420
- "rewards/accuracies": 0.44999998807907104,
421
- "rewards/chosen": -582.3192138671875,
422
- "rewards/margins": -77.30831146240234,
423
- "rewards/rejected": -505.01092529296875,
424
  "step": 250,
425
- "use_label": 1513.0250244140625
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.045401629802096e-05,
430
- "logits/chosen": -3.8084158897399902,
431
- "logits/rejected": -3.8078300952911377,
432
- "logps/chosen": -5415.3056640625,
433
- "logps/rejected": -4981.9599609375,
434
- "loss": 0.0144,
435
- "pred_label": 2563.925048828125,
436
- "rewards/accuracies": 0.5062500238418579,
437
- "rewards/chosen": -516.6696166992188,
438
- "rewards/margins": -43.502445220947266,
439
- "rewards/rejected": -473.16717529296875,
440
  "step": 260,
441
- "use_label": 1518.074951171875
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.9871944121071014e-05,
446
- "logits/chosen": -3.8132598400115967,
447
- "logits/rejected": -3.8127427101135254,
448
- "logps/chosen": -5882.3447265625,
449
- "logps/rejected": -5165.20703125,
450
- "loss": 0.0155,
451
- "pred_label": 2719.97509765625,
452
- "rewards/accuracies": 0.4625000059604645,
453
- "rewards/chosen": -559.0473022460938,
454
- "rewards/margins": -70.0018310546875,
455
- "rewards/rejected": -489.0455017089844,
456
  "step": 270,
457
- "use_label": 1522.0250244140625
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.928987194412107e-05,
462
- "logits/chosen": -3.8188316822052,
463
- "logits/rejected": -3.818444013595581,
464
- "logps/chosen": -5914.48486328125,
465
- "logps/rejected": -5317.22021484375,
466
- "loss": 0.0222,
467
- "pred_label": 2876.02490234375,
468
- "rewards/accuracies": 0.4312500059604645,
469
- "rewards/chosen": -562.0521240234375,
470
- "rewards/margins": -56.552947998046875,
471
- "rewards/rejected": -505.4991760253906,
472
  "step": 280,
473
- "use_label": 1525.9749755859375
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-05,
478
- "logits/chosen": -3.819366931915283,
479
- "logits/rejected": -3.82012939453125,
480
- "logps/chosen": -5673.76416015625,
481
- "logps/rejected": -4572.4462890625,
482
- "loss": 0.0131,
483
- "pred_label": 3034.27490234375,
484
- "rewards/accuracies": 0.40625,
485
- "rewards/chosen": -538.0841674804688,
486
- "rewards/margins": -103.86119079589844,
487
- "rewards/rejected": -434.2230529785156,
488
  "step": 290,
489
- "use_label": 1527.7249755859375
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022119e-05,
494
- "logits/chosen": -3.801610231399536,
495
- "logits/rejected": -3.802950382232666,
496
- "logps/chosen": -5732.44921875,
497
- "logps/rejected": -4702.1435546875,
498
- "loss": 0.0155,
499
- "pred_label": 3192.824951171875,
500
- "rewards/accuracies": 0.38749998807907104,
501
- "rewards/chosen": -546.8770751953125,
502
- "rewards/margins": -100.64713287353516,
503
- "rewards/rejected": -446.2298889160156,
504
  "step": 300,
505
- "use_label": 1529.175048828125
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.7543655413271246e-05,
510
- "logits/chosen": -3.7929720878601074,
511
- "logits/rejected": -3.7945361137390137,
512
- "logps/chosen": -5449.23046875,
513
- "logps/rejected": -5404.5537109375,
514
- "loss": 0.0162,
515
- "pred_label": 3350.675048828125,
516
- "rewards/accuracies": 0.5249999761581421,
517
- "rewards/chosen": -518.22998046875,
518
- "rewards/margins": -6.628878593444824,
519
- "rewards/rejected": -511.60107421875,
520
  "step": 310,
521
- "use_label": 1531.324951171875
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.696158323632131e-05,
526
- "logits/chosen": -3.804478883743286,
527
- "logits/rejected": -3.808168411254883,
528
- "logps/chosen": -6255.1689453125,
529
- "logps/rejected": -5367.044921875,
530
- "loss": 0.0127,
531
- "pred_label": 3508.02490234375,
532
- "rewards/accuracies": 0.4437499940395355,
533
- "rewards/chosen": -595.9188842773438,
534
- "rewards/margins": -87.29522705078125,
535
- "rewards/rejected": -508.6236267089844,
536
  "step": 320,
537
- "use_label": 1533.9749755859375
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-05,
542
- "logits/chosen": -3.806224822998047,
543
- "logits/rejected": -3.809751510620117,
544
- "logps/chosen": -5673.6767578125,
545
- "logps/rejected": -4599.72119140625,
546
- "loss": 0.0221,
547
- "pred_label": 3666.10009765625,
548
- "rewards/accuracies": 0.4749999940395355,
549
- "rewards/chosen": -540.2728881835938,
550
- "rewards/margins": -103.72891998291016,
551
- "rewards/rejected": -436.5439453125,
552
  "step": 330,
553
- "use_label": 1535.9000244140625
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-05,
558
- "logits/chosen": -3.807875871658325,
559
- "logits/rejected": -3.8099751472473145,
560
- "logps/chosen": -5879.23486328125,
561
- "logps/rejected": -4872.8642578125,
562
- "loss": 0.0213,
563
- "pred_label": 3819.85009765625,
564
- "rewards/accuracies": 0.39375001192092896,
565
- "rewards/chosen": -559.0055541992188,
566
- "rewards/margins": -95.80415344238281,
567
- "rewards/rejected": -463.20135498046875,
568
  "step": 340,
569
- "use_label": 1542.1500244140625
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.5215366705471484e-05,
574
- "logits/chosen": -3.8283824920654297,
575
- "logits/rejected": -3.8290863037109375,
576
- "logps/chosen": -6421.64453125,
577
- "logps/rejected": -5712.4833984375,
578
- "loss": 0.0197,
579
- "pred_label": 3974.35009765625,
580
- "rewards/accuracies": 0.375,
581
- "rewards/chosen": -612.4450073242188,
582
- "rewards/margins": -68.9627685546875,
583
- "rewards/rejected": -543.482177734375,
584
  "step": 350,
585
- "use_label": 1547.6500244140625
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.463329452852154e-05,
590
- "logits/chosen": -3.8224472999572754,
591
- "logits/rejected": -3.822279691696167,
592
- "logps/chosen": -5800.58251953125,
593
- "logps/rejected": -5399.095703125,
594
- "loss": 0.0133,
595
- "pred_label": 4133.25,
596
- "rewards/accuracies": 0.42500001192092896,
597
- "rewards/chosen": -552.7788696289062,
598
- "rewards/margins": -39.389373779296875,
599
- "rewards/rejected": -513.3894653320312,
600
  "step": 360,
601
- "use_label": 1548.75
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.40512223515716e-05,
606
- "logits/chosen": -3.8213393688201904,
607
- "logits/rejected": -3.8208725452423096,
608
- "logps/chosen": -5875.4296875,
609
- "logps/rejected": -5105.2080078125,
610
- "loss": 0.0144,
611
- "pred_label": 4289.4501953125,
612
- "rewards/accuracies": 0.4625000059604645,
613
- "rewards/chosen": -559.922607421875,
614
- "rewards/margins": -76.97410583496094,
615
- "rewards/rejected": -482.9485778808594,
616
  "step": 370,
617
- "use_label": 1552.550048828125
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.3469150174621654e-05,
622
- "logits/chosen": -3.786717176437378,
623
- "logits/rejected": -3.7882437705993652,
624
- "logps/chosen": -6002.546875,
625
- "logps/rejected": -5331.99560546875,
626
- "loss": 0.0231,
627
- "pred_label": 4444.1748046875,
628
- "rewards/accuracies": 0.41874998807907104,
629
- "rewards/chosen": -571.731689453125,
630
- "rewards/margins": -64.58997344970703,
631
- "rewards/rejected": -507.1416931152344,
632
  "step": 380,
633
- "use_label": 1557.824951171875
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-05,
638
- "logits/chosen": -3.6485819816589355,
639
- "logits/rejected": -3.6548709869384766,
640
- "logps/chosen": -5633.61083984375,
641
- "logps/rejected": -4738.9384765625,
642
- "loss": 0.0232,
643
- "pred_label": 4600.875,
644
- "rewards/accuracies": 0.4312500059604645,
645
- "rewards/chosen": -535.0819091796875,
646
- "rewards/margins": -85.61624145507812,
647
- "rewards/rejected": -449.46563720703125,
648
  "step": 390,
649
- "use_label": 1561.125
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.2305005820721776e-05,
654
- "logits/chosen": -3.767920732498169,
655
- "logits/rejected": -3.767390489578247,
656
- "logps/chosen": -6094.14697265625,
657
- "logps/rejected": -5175.2177734375,
658
- "loss": 0.0231,
659
- "pred_label": 4752.77490234375,
660
- "rewards/accuracies": 0.41874998807907104,
661
- "rewards/chosen": -578.7096557617188,
662
- "rewards/margins": -87.12177276611328,
663
- "rewards/rejected": -491.58782958984375,
664
  "step": 400,
665
- "use_label": 1569.2249755859375
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.172293364377183e-05,
670
- "logits/chosen": -3.7445671558380127,
671
- "logits/rejected": -3.754565715789795,
672
- "logps/chosen": -6168.5,
673
- "logps/rejected": -5233.85009765625,
674
- "loss": 0.0123,
675
- "pred_label": 4906.02490234375,
676
- "rewards/accuracies": 0.46875,
677
- "rewards/chosen": -587.0402221679688,
678
- "rewards/margins": -89.51008605957031,
679
- "rewards/rejected": -497.5301818847656,
680
  "step": 410,
681
- "use_label": 1575.9749755859375
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-05,
686
- "logits/chosen": -3.766185760498047,
687
- "logits/rejected": -3.764925003051758,
688
- "logps/chosen": -4928.68701171875,
689
- "logps/rejected": -4211.3857421875,
690
- "loss": 0.0286,
691
- "pred_label": 5062.2001953125,
692
- "rewards/accuracies": 0.42500001192092896,
693
- "rewards/chosen": -468.8106994628906,
694
- "rewards/margins": -72.5836410522461,
695
- "rewards/rejected": -396.22705078125,
696
  "step": 420,
697
- "use_label": 1579.800048828125
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.055878928987195e-05,
702
- "logits/chosen": -3.76971173286438,
703
- "logits/rejected": -3.766024351119995,
704
- "logps/chosen": -5624.13330078125,
705
- "logps/rejected": -5330.14599609375,
706
- "loss": 0.0128,
707
- "pred_label": 5218.125,
708
- "rewards/accuracies": 0.48750001192092896,
709
- "rewards/chosen": -534.6233520507812,
710
- "rewards/margins": -27.560443878173828,
711
- "rewards/rejected": -507.06292724609375,
712
  "step": 430,
713
- "use_label": 1583.875
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922005e-05,
718
- "logits/chosen": -3.8054771423339844,
719
- "logits/rejected": -3.8054962158203125,
720
- "logps/chosen": -5717.0419921875,
721
- "logps/rejected": -4923.8671875,
722
- "loss": 0.0159,
723
- "pred_label": 5373.875,
724
- "rewards/accuracies": 0.4375,
725
- "rewards/chosen": -543.377197265625,
726
- "rewards/margins": -76.2901382446289,
727
- "rewards/rejected": -467.0870666503906,
728
  "step": 440,
729
- "use_label": 1588.125
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-05,
734
- "logits/chosen": -3.7968783378601074,
735
- "logits/rejected": -3.7904553413391113,
736
- "logps/chosen": -4891.21484375,
737
- "logps/rejected": -4621.8271484375,
738
- "loss": 0.0209,
739
- "pred_label": 5531.77490234375,
740
- "rewards/accuracies": 0.4124999940395355,
741
- "rewards/chosen": -466.149658203125,
742
- "rewards/margins": -26.93206787109375,
743
- "rewards/rejected": -439.21759033203125,
744
  "step": 450,
745
- "use_label": 1590.2249755859375
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.881257275902212e-05,
750
- "logits/chosen": -3.8137125968933105,
751
- "logits/rejected": -3.8143749237060547,
752
- "logps/chosen": -6517.14404296875,
753
- "logps/rejected": -5308.48095703125,
754
- "loss": 0.0172,
755
- "pred_label": 5688.375,
756
- "rewards/accuracies": 0.39375001192092896,
757
- "rewards/chosen": -621.5343017578125,
758
- "rewards/margins": -117.5860595703125,
759
- "rewards/rejected": -503.9481506347656,
760
  "step": 460,
761
- "use_label": 1593.625
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072178e-05,
766
- "logits/chosen": -3.7992587089538574,
767
- "logits/rejected": -3.799516201019287,
768
- "logps/chosen": -5745.47314453125,
769
- "logps/rejected": -5189.96923828125,
770
- "loss": 0.0155,
771
- "pred_label": 5845.52490234375,
772
- "rewards/accuracies": 0.44999998807907104,
773
- "rewards/chosen": -547.8372802734375,
774
- "rewards/margins": -54.52460861206055,
775
- "rewards/rejected": -493.3126525878906,
776
  "step": 470,
777
- "use_label": 1596.4749755859375
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-05,
782
- "logits/chosen": -3.761199951171875,
783
- "logits/rejected": -3.7633252143859863,
784
- "logps/chosen": -5170.09765625,
785
- "logps/rejected": -5077.68310546875,
786
- "loss": 0.0168,
787
- "pred_label": 6002.375,
788
- "rewards/accuracies": 0.4625000059604645,
789
- "rewards/chosen": -492.452392578125,
790
- "rewards/margins": -9.535995483398438,
791
- "rewards/rejected": -482.9164123535156,
792
  "step": 480,
793
- "use_label": 1599.625
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.7066356228172297e-05,
798
- "logits/chosen": -3.7587084770202637,
799
- "logits/rejected": -3.758279323577881,
800
- "logps/chosen": -5773.9345703125,
801
- "logps/rejected": -4788.09765625,
802
- "loss": 0.0171,
803
- "pred_label": 6158.6748046875,
804
- "rewards/accuracies": 0.4124999940395355,
805
- "rewards/chosen": -550.6905517578125,
806
- "rewards/margins": -95.62019348144531,
807
- "rewards/rejected": -455.0704040527344,
808
  "step": 490,
809
- "use_label": 1603.324951171875
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.6484284051222352e-05,
814
- "logits/chosen": -3.767758846282959,
815
- "logits/rejected": -3.7685482501983643,
816
- "logps/chosen": -6388.5419921875,
817
- "logps/rejected": -5069.38916015625,
818
- "loss": 0.0222,
819
- "pred_label": 6314.52490234375,
820
- "rewards/accuracies": 0.375,
821
- "rewards/chosen": -609.9085693359375,
822
- "rewards/margins": -128.79580688476562,
823
- "rewards/rejected": -481.1127014160156,
824
  "step": 500,
825
- "use_label": 1607.4749755859375
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-05,
830
- "logits/chosen": -3.7820258140563965,
831
- "logits/rejected": -3.784348964691162,
832
- "logps/chosen": -5971.962890625,
833
- "logps/rejected": -4760.34912109375,
834
- "loss": 0.0301,
835
- "pred_label": 6469.9501953125,
836
- "rewards/accuracies": 0.42500001192092896,
837
- "rewards/chosen": -569.66259765625,
838
- "rewards/margins": -116.97715759277344,
839
- "rewards/rejected": -452.6853942871094,
840
  "step": 510,
841
- "use_label": 1612.050048828125
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.532013969732247e-05,
846
- "logits/chosen": -3.718219041824341,
847
- "logits/rejected": -3.72932767868042,
848
- "logps/chosen": -6069.69580078125,
849
- "logps/rejected": -5217.16015625,
850
- "loss": 0.0223,
851
- "pred_label": 6623.9248046875,
852
- "rewards/accuracies": 0.41874998807907104,
853
- "rewards/chosen": -577.7742309570312,
854
- "rewards/margins": -83.02960205078125,
855
- "rewards/rejected": -494.74456787109375,
856
  "step": 520,
857
- "use_label": 1618.074951171875
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372525e-05,
862
- "logits/chosen": -3.7202675342559814,
863
- "logits/rejected": -3.7229580879211426,
864
- "logps/chosen": -6532.5537109375,
865
- "logps/rejected": -5770.68359375,
866
- "loss": 0.0095,
867
- "pred_label": 6780.8251953125,
868
- "rewards/accuracies": 0.4000000059604645,
869
- "rewards/chosen": -623.2237548828125,
870
- "rewards/margins": -75.55280303955078,
871
- "rewards/rejected": -547.6709594726562,
872
  "step": 530,
873
- "use_label": 1621.175048828125
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.4155995343422587e-05,
878
- "logits/chosen": -3.759662628173828,
879
- "logits/rejected": -3.7599411010742188,
880
- "logps/chosen": -6315.06787109375,
881
- "logps/rejected": -5507.916015625,
882
- "loss": 0.01,
883
- "pred_label": 6938.4501953125,
884
- "rewards/accuracies": 0.41874998807907104,
885
- "rewards/chosen": -602.653076171875,
886
- "rewards/margins": -78.45845031738281,
887
- "rewards/rejected": -524.1947021484375,
888
  "step": 540,
889
- "use_label": 1623.550048828125
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472644e-05,
894
- "logits/chosen": -3.738492488861084,
895
- "logits/rejected": -3.7378597259521484,
896
- "logps/chosen": -5971.4853515625,
897
- "logps/rejected": -5198.08935546875,
898
- "loss": 0.0129,
899
- "pred_label": 7093.9501953125,
900
- "rewards/accuracies": 0.4000000059604645,
901
- "rewards/chosen": -568.2282104492188,
902
- "rewards/margins": -74.6135025024414,
903
- "rewards/rejected": -493.61468505859375,
904
  "step": 550,
905
- "use_label": 1628.050048828125
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522702e-05,
910
- "logits/chosen": -3.794232130050659,
911
- "logits/rejected": -3.793727397918701,
912
- "logps/chosen": -5239.75048828125,
913
- "logps/rejected": -4281.4697265625,
914
- "loss": 0.0136,
915
- "pred_label": 7250.25,
916
- "rewards/accuracies": 0.41874998807907104,
917
- "rewards/chosen": -496.53460693359375,
918
- "rewards/margins": -90.13624572753906,
919
- "rewards/rejected": -406.3983459472656,
920
  "step": 560,
921
- "use_label": 1631.75
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.240977881257276e-05,
926
- "logits/chosen": -3.7495296001434326,
927
- "logits/rejected": -3.7504706382751465,
928
- "logps/chosen": -6018.4404296875,
929
- "logps/rejected": -5286.20751953125,
930
- "loss": 0.0237,
931
- "pred_label": 7407.02490234375,
932
- "rewards/accuracies": 0.4124999940395355,
933
- "rewards/chosen": -574.8952026367188,
934
- "rewards/margins": -72.47772979736328,
935
- "rewards/rejected": -502.41748046875,
936
  "step": 570,
937
- "use_label": 1634.9749755859375
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622818e-05,
942
- "logits/chosen": -3.7940216064453125,
943
- "logits/rejected": -3.794236421585083,
944
- "logps/chosen": -5965.88134765625,
945
- "logps/rejected": -4998.4501953125,
946
- "loss": 0.0206,
947
- "pred_label": 7561.5498046875,
948
- "rewards/accuracies": 0.3812499940395355,
949
- "rewards/chosen": -568.77734375,
950
- "rewards/margins": -95.3790054321289,
951
- "rewards/rejected": -473.3983459472656,
952
  "step": 580,
953
- "use_label": 1640.449951171875
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.124563445867288e-05,
958
- "logits/chosen": -3.725088119506836,
959
- "logits/rejected": -3.7297370433807373,
960
- "logps/chosen": -5610.734375,
961
- "logps/rejected": -5206.3388671875,
962
- "loss": 0.0217,
963
- "pred_label": 7717.9248046875,
964
- "rewards/accuracies": 0.4312500059604645,
965
- "rewards/chosen": -533.9136962890625,
966
- "rewards/margins": -41.3577766418457,
967
- "rewards/rejected": -492.555908203125,
968
  "step": 590,
969
- "use_label": 1644.074951171875
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722934e-05,
974
- "logits/chosen": -2.4542346000671387,
975
- "logits/rejected": -2.457996129989624,
976
- "logps/chosen": -5316.2861328125,
977
- "logps/rejected": -4824.51171875,
978
- "loss": 0.0176,
979
- "pred_label": 7873.5498046875,
980
- "rewards/accuracies": 0.4749999940395355,
981
- "rewards/chosen": -500.2923889160156,
982
- "rewards/margins": -48.11725616455078,
983
- "rewards/rejected": -452.1751403808594,
984
  "step": 600,
985
- "use_label": 1648.449951171875
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.0081490104772992e-05,
990
- "logits/chosen": 1.6535043716430664,
991
- "logits/rejected": 1.6919664144515991,
992
- "logps/chosen": -4125.20458984375,
993
- "logps/rejected": -3309.930419921875,
994
- "loss": 0.019,
995
- "pred_label": 8029.0,
996
- "rewards/accuracies": 0.3687500059604645,
997
- "rewards/chosen": -381.9295349121094,
998
- "rewards/margins": -74.89913177490234,
999
- "rewards/rejected": -307.0304260253906,
1000
  "step": 610,
1001
- "use_label": 1653.0
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.9499417927823053e-05,
1006
- "logits/chosen": 3.7263665199279785,
1007
- "logits/rejected": 3.714616298675537,
1008
- "logps/chosen": -5211.14453125,
1009
- "logps/rejected": -4633.3828125,
1010
- "loss": 0.0148,
1011
- "pred_label": 8184.77490234375,
1012
- "rewards/accuracies": 0.4375,
1013
- "rewards/chosen": -493.1163024902344,
1014
- "rewards/margins": -54.99699783325195,
1015
- "rewards/rejected": -438.11932373046875,
1016
  "step": 620,
1017
- "use_label": 1657.2249755859375
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-05,
1022
- "logits/chosen": 7.3053741455078125,
1023
- "logits/rejected": 7.303783416748047,
1024
- "logps/chosen": -7381.1630859375,
1025
- "logps/rejected": -6444.02734375,
1026
- "loss": 0.009,
1027
- "pred_label": 8343.1748046875,
1028
- "rewards/accuracies": 0.41874998807907104,
1029
- "rewards/chosen": -709.6043701171875,
1030
- "rewards/margins": -91.3189697265625,
1031
- "rewards/rejected": -618.2854614257812,
1032
  "step": 630,
1033
- "use_label": 1658.824951171875
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.833527357392317e-05,
1038
- "logits/chosen": 8.230302810668945,
1039
- "logits/rejected": 8.22825813293457,
1040
- "logps/chosen": -7595.42724609375,
1041
- "logps/rejected": -7036.8515625,
1042
- "loss": 0.0111,
1043
- "pred_label": 8500.5,
1044
- "rewards/accuracies": 0.4749999940395355,
1045
- "rewards/chosen": -734.2236328125,
1046
- "rewards/margins": -55.39581298828125,
1047
- "rewards/rejected": -678.8277587890625,
1048
  "step": 640,
1049
- "use_label": 1661.5
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973227e-05,
1054
- "logits/chosen": 8.20081901550293,
1055
- "logits/rejected": 8.195457458496094,
1056
- "logps/chosen": -9194.9013671875,
1057
- "logps/rejected": -7898.6552734375,
1058
- "loss": 0.0088,
1059
- "pred_label": 8658.349609375,
1060
- "rewards/accuracies": 0.4437499940395355,
1061
- "rewards/chosen": -889.2952880859375,
1062
- "rewards/margins": -127.2280044555664,
1063
- "rewards/rejected": -762.0673217773438,
1064
  "step": 650,
1065
- "use_label": 1663.6500244140625
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.717112922002328e-05,
1070
- "logits/chosen": 9.882159233093262,
1071
- "logits/rejected": 9.892133712768555,
1072
- "logps/chosen": -10026.548828125,
1073
- "logps/rejected": -8868.25,
1074
- "loss": 0.0147,
1075
- "pred_label": 8817.3251953125,
1076
- "rewards/accuracies": 0.4312500059604645,
1077
- "rewards/chosen": -973.1184692382812,
1078
- "rewards/margins": -111.28104400634766,
1079
- "rewards/rejected": -861.83740234375,
1080
  "step": 660,
1081
- "use_label": 1664.675048828125
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.6589057043073342e-05,
1086
- "logits/chosen": 11.399931907653809,
1087
- "logits/rejected": 11.406278610229492,
1088
- "logps/chosen": -11008.333984375,
1089
- "logps/rejected": -9124.1875,
1090
- "loss": 0.0161,
1091
- "pred_label": 8974.8251953125,
1092
- "rewards/accuracies": 0.44999998807907104,
1093
- "rewards/chosen": -1072.7261962890625,
1094
- "rewards/margins": -184.44720458984375,
1095
- "rewards/rejected": -888.2789916992188,
1096
  "step": 670,
1097
- "use_label": 1667.175048828125
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-05,
1102
- "logits/chosen": 9.982951164245605,
1103
- "logits/rejected": 9.928037643432617,
1104
- "logps/chosen": -10043.669921875,
1105
- "logps/rejected": -9005.763671875,
1106
- "loss": 0.0146,
1107
- "pred_label": 9133.150390625,
1108
- "rewards/accuracies": 0.4749999940395355,
1109
- "rewards/chosen": -975.6735229492188,
1110
- "rewards/margins": -100.92012023925781,
1111
- "rewards/rejected": -874.75341796875,
1112
  "step": 680,
1113
- "use_label": 1668.8499755859375
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173458e-05,
1118
- "logits/chosen": 3.900209903717041,
1119
- "logits/rejected": 3.7533345222473145,
1120
- "logps/chosen": -5247.2783203125,
1121
- "logps/rejected": -4165.42138671875,
1122
- "loss": 0.015,
1123
- "pred_label": 9290.625,
1124
- "rewards/accuracies": 0.39375001192092896,
1125
- "rewards/chosen": -499.68951416015625,
1126
- "rewards/margins": -104.0757827758789,
1127
- "rewards/rejected": -395.61370849609375,
1128
  "step": 690,
1129
- "use_label": 1671.375
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223516e-05,
1134
- "logits/chosen": 3.9459800720214844,
1135
- "logits/rejected": 3.741647243499756,
1136
- "logps/chosen": -6615.76708984375,
1137
- "logps/rejected": -5040.81982421875,
1138
- "loss": 0.0158,
1139
- "pred_label": 9445.275390625,
1140
- "rewards/accuracies": 0.40625,
1141
- "rewards/chosen": -634.0076904296875,
1142
- "rewards/margins": -154.7536163330078,
1143
- "rewards/rejected": -479.25408935546875,
1144
  "step": 700,
1145
- "use_label": 1676.7249755859375
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273575e-05,
1150
- "logits/chosen": 5.425192832946777,
1151
- "logits/rejected": 5.073692321777344,
1152
- "logps/chosen": -8362.833984375,
1153
- "logps/rejected": -6741.9013671875,
1154
- "loss": 0.0127,
1155
- "pred_label": 9602.0498046875,
1156
- "rewards/accuracies": 0.38749998807907104,
1157
- "rewards/chosen": -807.3259887695312,
1158
- "rewards/margins": -157.26266479492188,
1159
- "rewards/rejected": -650.0633544921875,
1160
  "step": 710,
1161
- "use_label": 1679.949951171875
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323633e-05,
1166
- "logits/chosen": 10.10822582244873,
1167
- "logits/rejected": 10.002889633178711,
1168
- "logps/chosen": -10245.421875,
1169
- "logps/rejected": -9104.876953125,
1170
- "loss": 0.023,
1171
- "pred_label": 9759.0751953125,
1172
- "rewards/accuracies": 0.4749999940395355,
1173
- "rewards/chosen": -997.3968505859375,
1174
- "rewards/margins": -112.24949645996094,
1175
- "rewards/rejected": -885.1474609375,
1176
  "step": 720,
1177
- "use_label": 1682.925048828125
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.309662398137369e-05,
1182
- "logits/chosen": 10.97143840789795,
1183
- "logits/rejected": 10.992796897888184,
1184
- "logps/chosen": -10079.0634765625,
1185
- "logps/rejected": -8320.7255859375,
1186
- "loss": 0.0134,
1187
- "pred_label": 9917.2001953125,
1188
- "rewards/accuracies": 0.4000000059604645,
1189
- "rewards/chosen": -978.1613159179688,
1190
- "rewards/margins": -170.25567626953125,
1191
- "rewards/rejected": -807.9056396484375,
1192
  "step": 730,
1193
- "use_label": 1684.800048828125
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-05,
1198
- "logits/chosen": 12.233144760131836,
1199
- "logits/rejected": 12.248846054077148,
1200
- "logps/chosen": -12818.298828125,
1201
- "logps/rejected": -11287.875,
1202
- "loss": 0.0048,
1203
- "pred_label": 10076.650390625,
1204
- "rewards/accuracies": 0.45625001192092896,
1205
- "rewards/chosen": -1251.0927734375,
1206
- "rewards/margins": -150.9541473388672,
1207
- "rewards/rejected": -1100.138671875,
1208
  "step": 740,
1209
- "use_label": 1685.3499755859375
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-05,
1214
- "logits/chosen": 12.499679565429688,
1215
- "logits/rejected": 12.485097885131836,
1216
- "logps/chosen": -11923.8232421875,
1217
- "logps/rejected": -10479.5771484375,
1218
- "loss": 0.0074,
1219
- "pred_label": 10235.875,
1220
- "rewards/accuracies": 0.4312500059604645,
1221
- "rewards/chosen": -1166.3336181640625,
1222
- "rewards/margins": -144.428466796875,
1223
- "rewards/rejected": -1021.9050903320312,
1224
  "step": 750,
1225
- "use_label": 1686.125
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523866e-05,
1230
- "logits/chosen": 7.414717197418213,
1231
- "logits/rejected": 7.40515661239624,
1232
- "logps/chosen": -9329.333984375,
1233
- "logps/rejected": -8092.07177734375,
1234
- "loss": 0.0077,
1235
- "pred_label": 10393.625,
1236
- "rewards/accuracies": 0.44999998807907104,
1237
- "rewards/chosen": -903.3870849609375,
1238
- "rewards/margins": -120.39128112792969,
1239
- "rewards/rejected": -782.995849609375,
1240
  "step": 760,
1241
- "use_label": 1688.375
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-05,
1246
- "logits/chosen": 3.0171780586242676,
1247
- "logits/rejected": 2.9968318939208984,
1248
- "logps/chosen": -6287.14453125,
1249
- "logps/rejected": -5580.78515625,
1250
- "loss": 0.0133,
1251
- "pred_label": 10549.275390625,
1252
- "rewards/accuracies": 0.48124998807907104,
1253
- "rewards/chosen": -598.4849243164062,
1254
- "rewards/margins": -68.8787841796875,
1255
- "rewards/rejected": -529.6060791015625,
1256
  "step": 770,
1257
- "use_label": 1692.7249755859375
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623982e-05,
1262
- "logits/chosen": -1.7731034755706787,
1263
- "logits/rejected": -1.784906029701233,
1264
- "logps/chosen": -4869.2548828125,
1265
- "logps/rejected": -4165.048828125,
1266
- "loss": 0.0135,
1267
- "pred_label": 10705.75,
1268
- "rewards/accuracies": 0.42500001192092896,
1269
- "rewards/chosen": -458.80615234375,
1270
- "rewards/margins": -67.01484680175781,
1271
- "rewards/rejected": -391.7913513183594,
1272
  "step": 780,
1273
- "use_label": 1696.25
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.60419091967404e-06,
1278
- "logits/chosen": -0.7930339574813843,
1279
- "logits/rejected": -0.8520814180374146,
1280
- "logps/chosen": -4772.41162109375,
1281
- "logps/rejected": -4426.7998046875,
1282
- "loss": 0.0181,
1283
- "pred_label": 10861.849609375,
1284
- "rewards/accuracies": 0.42500001192092896,
1285
- "rewards/chosen": -450.46026611328125,
1286
- "rewards/margins": -33.853233337402344,
1287
- "rewards/rejected": -416.60699462890625,
1288
  "step": 790,
1289
- "use_label": 1700.1500244140625
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724098e-06,
1294
- "logits/chosen": -2.1026828289031982,
1295
- "logits/rejected": -2.1392974853515625,
1296
- "logps/chosen": -5048.7392578125,
1297
- "logps/rejected": -4407.5849609375,
1298
- "loss": 0.0168,
1299
- "pred_label": 11020.3251953125,
1300
- "rewards/accuracies": 0.4625000059604645,
1301
- "rewards/chosen": -473.4267578125,
1302
- "rewards/margins": -60.274497985839844,
1303
- "rewards/rejected": -413.1521911621094,
1304
  "step": 800,
1305
- "use_label": 1701.675048828125
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774158e-06,
1310
- "logits/chosen": -1.4834654331207275,
1311
- "logits/rejected": -1.5466824769973755,
1312
- "logps/chosen": -3907.274169921875,
1313
- "logps/rejected": -3107.385986328125,
1314
- "loss": 0.014,
1315
- "pred_label": 11177.2001953125,
1316
- "rewards/accuracies": 0.3812499940395355,
1317
- "rewards/chosen": -362.62506103515625,
1318
- "rewards/margins": -74.76776885986328,
1319
- "rewards/rejected": -287.8572692871094,
1320
  "step": 810,
1321
- "use_label": 1704.800048828125
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824214e-06,
1326
- "logits/chosen": -0.9667215347290039,
1327
- "logits/rejected": -1.0632926225662231,
1328
- "logps/chosen": -3960.51708984375,
1329
- "logps/rejected": -3172.82275390625,
1330
- "loss": 0.0275,
1331
- "pred_label": 11334.25,
1332
- "rewards/accuracies": 0.4000000059604645,
1333
- "rewards/chosen": -366.71990966796875,
1334
- "rewards/margins": -73.59380340576172,
1335
- "rewards/rejected": -293.1261291503906,
1336
  "step": 820,
1337
- "use_label": 1707.75
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-06,
1342
- "logits/chosen": 3.487344264984131,
1343
- "logits/rejected": 3.3718509674072266,
1344
- "logps/chosen": -5937.5302734375,
1345
- "logps/rejected": -6147.76416015625,
1346
- "loss": 0.014,
1347
- "pred_label": 11490.599609375,
1348
- "rewards/accuracies": 0.5625,
1349
- "rewards/chosen": -567.4717407226562,
1350
- "rewards/margins": 19.284542083740234,
1351
- "rewards/rejected": -586.7562255859375,
1352
  "step": 830,
1353
- "use_label": 1711.4000244140625
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-06,
1358
- "logits/chosen": 11.813470840454102,
1359
- "logits/rejected": 11.792594909667969,
1360
- "logps/chosen": -11349.767578125,
1361
- "logps/rejected": -10712.0576171875,
1362
- "loss": 0.01,
1363
- "pred_label": 11647.599609375,
1364
- "rewards/accuracies": 0.4749999940395355,
1365
- "rewards/chosen": -1108.175048828125,
1366
- "rewards/margins": -63.417640686035156,
1367
- "rewards/rejected": -1044.7574462890625,
1368
  "step": 840,
1369
- "use_label": 1714.4000244140625
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-06,
1374
- "logits/chosen": 12.407671928405762,
1375
- "logits/rejected": 12.412581443786621,
1376
- "logps/chosen": -12044.3359375,
1377
- "logps/rejected": -10440.21875,
1378
- "loss": 0.0137,
1379
- "pred_label": 11806.0498046875,
1380
- "rewards/accuracies": 0.41874998807907104,
1381
- "rewards/chosen": -1174.930908203125,
1382
- "rewards/margins": -155.24510192871094,
1383
- "rewards/rejected": -1019.6856689453125,
1384
  "step": 850,
1385
- "use_label": 1715.949951171875
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024447e-06,
1390
- "logits/chosen": 13.048141479492188,
1391
- "logits/rejected": 13.045463562011719,
1392
- "logps/chosen": -12283.849609375,
1393
- "logps/rejected": -11249.0595703125,
1394
- "loss": 0.0112,
1395
- "pred_label": 11965.0,
1396
- "rewards/accuracies": 0.4437499940395355,
1397
- "rewards/chosen": -1199.896728515625,
1398
- "rewards/margins": -102.23991394042969,
1399
- "rewards/rejected": -1097.6568603515625,
1400
  "step": 860,
1401
- "use_label": 1717.0
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074506e-06,
1406
- "logits/chosen": 13.306634902954102,
1407
- "logits/rejected": 13.326390266418457,
1408
- "logps/chosen": -10968.7822265625,
1409
- "logps/rejected": -10457.6435546875,
1410
- "loss": 0.0108,
1411
- "pred_label": 12123.5751953125,
1412
- "rewards/accuracies": 0.543749988079071,
1413
- "rewards/chosen": -1072.5794677734375,
1414
- "rewards/margins": -50.84003448486328,
1415
- "rewards/rejected": -1021.7394409179688,
1416
  "step": 870,
1417
- "use_label": 1718.425048828125
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.3655413271245635e-06,
1422
- "logits/chosen": 13.300872802734375,
1423
- "logits/rejected": 13.316276550292969,
1424
- "logps/chosen": -13030.8095703125,
1425
- "logps/rejected": -11216.4794921875,
1426
- "loss": 0.0078,
1427
- "pred_label": 12279.9501953125,
1428
- "rewards/accuracies": 0.4749999940395355,
1429
- "rewards/chosen": -1274.422119140625,
1430
- "rewards/margins": -178.9391632080078,
1431
- "rewards/rejected": -1095.4830322265625,
1432
  "step": 880,
1433
- "use_label": 1722.050048828125
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.7834691501746217e-06,
1438
- "logits/chosen": 13.323100090026855,
1439
- "logits/rejected": 13.341893196105957,
1440
- "logps/chosen": -13646.083984375,
1441
- "logps/rejected": -12056.134765625,
1442
- "loss": 0.0108,
1443
- "pred_label": 12438.625,
1444
- "rewards/accuracies": 0.4124999940395355,
1445
- "rewards/chosen": -1336.4798583984375,
1446
- "rewards/margins": -158.06932067871094,
1447
- "rewards/rejected": -1178.41064453125,
1448
  "step": 890,
1449
- "use_label": 1723.375
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.2013969732246805e-06,
1454
- "logits/chosen": 13.762173652648926,
1455
- "logits/rejected": 13.755559921264648,
1456
- "logps/chosen": -13121.990234375,
1457
- "logps/rejected": -10966.107421875,
1458
- "loss": 0.0193,
1459
- "pred_label": 12596.1748046875,
1460
- "rewards/accuracies": 0.45625001192092896,
1461
- "rewards/chosen": -1284.2850341796875,
1462
- "rewards/margins": -213.5261688232422,
1463
- "rewards/rejected": -1070.7589111328125,
1464
  "step": 900,
1465
- "use_label": 1725.824951171875
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.6193247962747383e-06,
1470
- "logits/chosen": 13.810602188110352,
1471
- "logits/rejected": 13.802284240722656,
1472
- "logps/chosen": -13679.8857421875,
1473
- "logps/rejected": -11569.97265625,
1474
- "loss": 0.0091,
1475
- "pred_label": 12752.150390625,
1476
- "rewards/accuracies": 0.4312500059604645,
1477
- "rewards/chosen": -1338.3470458984375,
1478
- "rewards/margins": -207.6631317138672,
1479
- "rewards/rejected": -1130.6839599609375,
1480
  "step": 910,
1481
- "use_label": 1729.8499755859375
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-06,
1486
- "logits/chosen": 13.879419326782227,
1487
- "logits/rejected": 13.855003356933594,
1488
- "logps/chosen": -14082.59375,
1489
- "logps/rejected": -11603.6435546875,
1490
- "loss": 0.0145,
1491
- "pred_label": 12909.7998046875,
1492
- "rewards/accuracies": 0.3687500059604645,
1493
- "rewards/chosen": -1378.5706787109375,
1494
- "rewards/margins": -244.1646270751953,
1495
- "rewards/rejected": -1134.406005859375,
1496
  "step": 920,
1497
- "use_label": 1732.199951171875
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-06,
1502
- "logits/chosen": 13.563482284545898,
1503
- "logits/rejected": 13.548616409301758,
1504
- "logps/chosen": -13257.4296875,
1505
- "logps/rejected": -10279.7294921875,
1506
- "loss": 0.0103,
1507
- "pred_label": 13067.3251953125,
1508
- "rewards/accuracies": 0.45625001192092896,
1509
- "rewards/chosen": -1298.921630859375,
1510
- "rewards/margins": -295.1283264160156,
1511
- "rewards/rejected": -1003.79345703125,
1512
  "step": 930,
1513
- "use_label": 1734.675048828125
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249127e-07,
1518
- "logits/chosen": 13.876431465148926,
1519
- "logits/rejected": 13.86772632598877,
1520
- "logps/chosen": -14265.5234375,
1521
- "logps/rejected": -11806.12890625,
1522
- "loss": 0.0096,
1523
- "pred_label": 13226.525390625,
1524
- "rewards/accuracies": 0.3812499940395355,
1525
- "rewards/chosen": -1397.813232421875,
1526
- "rewards/margins": -243.88876342773438,
1527
- "rewards/rejected": -1153.92431640625,
1528
  "step": 940,
1529
- "use_label": 1735.4749755859375
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-07,
1534
- "logits/chosen": 13.791536331176758,
1535
- "logits/rejected": 13.786079406738281,
1536
- "logps/chosen": -12623.412109375,
1537
- "logps/rejected": -11098.677734375,
1538
- "loss": 0.0144,
1539
- "pred_label": 13384.7998046875,
1540
- "rewards/accuracies": 0.4749999940395355,
1541
- "rewards/chosen": -1235.495849609375,
1542
- "rewards/margins": -150.9226837158203,
1543
- "rewards/rejected": -1084.572998046875,
1544
  "step": 950,
1545
- "use_label": 1737.199951171875
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
- "eval_logits/chosen": 13.824411392211914,
1550
- "eval_logits/rejected": 13.813151359558105,
1551
- "eval_logps/chosen": -13722.0166015625,
1552
- "eval_logps/rejected": -11596.5400390625,
1553
- "eval_loss": 0.011624496430158615,
1554
- "eval_pred_label": 13789.83984375,
1555
- "eval_rewards/accuracies": 0.4740000069141388,
1556
- "eval_rewards/chosen": -1343.776123046875,
1557
- "eval_rewards/margins": -210.05210876464844,
1558
- "eval_rewards/rejected": -1133.72412109375,
1559
- "eval_runtime": 454.9033,
1560
- "eval_samples_per_second": 4.397,
1561
- "eval_steps_per_second": 0.275,
1562
- "eval_use_label": 1742.1600341796875,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
- "train_loss": 0.08065580570807007,
1570
- "train_runtime": 25294.0492,
1571
- "train_samples_per_second": 2.417,
1572
  "train_steps_per_second": 0.038
1573
  }
1574
  ],
 
75
  {
76
  "epoch": 0.04,
77
  "learning_rate": 2.0833333333333336e-05,
78
+ "logits/chosen": -2.841322422027588,
79
+ "logits/rejected": -2.84962797164917,
80
+ "logps/chosen": -281.39862060546875,
81
+ "logps/rejected": -277.9919738769531,
82
+ "loss": 0.6236,
83
+ "pred_label": 0.875,
84
+ "rewards/accuracies": 0.6812499761581421,
85
+ "rewards/chosen": 0.01947467401623726,
86
+ "rewards/margins": 0.21414700150489807,
87
+ "rewards/rejected": -0.19467231631278992,
88
  "step": 40,
89
+ "use_label": 561.125
90
  },
91
  {
92
  "epoch": 0.05,
93
  "learning_rate": 2.604166666666667e-05,
94
+ "logits/chosen": -2.8523213863372803,
95
+ "logits/rejected": -2.836536169052124,
96
+ "logps/chosen": -267.1900329589844,
97
+ "logps/rejected": -263.0260925292969,
98
+ "loss": 0.5468,
99
+ "pred_label": 18.174999237060547,
100
+ "rewards/accuracies": 0.637499988079071,
101
+ "rewards/chosen": -0.10816816240549088,
102
+ "rewards/margins": 0.4027964472770691,
103
+ "rewards/rejected": -0.5109646320343018,
104
  "step": 50,
105
+ "use_label": 703.8250122070312
106
  },
107
  {
108
  "epoch": 0.06,
109
  "learning_rate": 3.125e-05,
110
+ "logits/chosen": -2.8082404136657715,
111
+ "logits/rejected": -2.7957923412323,
112
+ "logps/chosen": -302.7262878417969,
113
+ "logps/rejected": -294.58624267578125,
114
+ "loss": 0.4674,
115
+ "pred_label": 66.9000015258789,
116
+ "rewards/accuracies": 0.706250011920929,
117
+ "rewards/chosen": -0.2311042845249176,
118
+ "rewards/margins": 0.616974413394928,
119
+ "rewards/rejected": -0.8480786085128784,
120
  "step": 60,
121
+ "use_label": 815.0999755859375
122
  },
123
  {
124
  "epoch": 0.07,
125
  "learning_rate": 3.6458333333333336e-05,
126
+ "logits/chosen": -2.8012547492980957,
127
+ "logits/rejected": -2.788729429244995,
128
+ "logps/chosen": -299.8072204589844,
129
+ "logps/rejected": -288.31036376953125,
130
+ "loss": 0.3802,
131
+ "pred_label": 144.4499969482422,
132
+ "rewards/accuracies": 0.737500011920929,
133
+ "rewards/chosen": -0.5493733286857605,
134
+ "rewards/margins": 0.9851129651069641,
135
+ "rewards/rejected": -1.534486174583435,
136
  "step": 70,
137
+ "use_label": 897.5499877929688
138
  },
139
  {
140
  "epoch": 0.08,
141
  "learning_rate": 4.166666666666667e-05,
142
+ "logits/chosen": -2.737917423248291,
143
+ "logits/rejected": -2.7173783779144287,
144
+ "logps/chosen": -318.4990234375,
145
+ "logps/rejected": -307.38311767578125,
146
+ "loss": 0.2535,
147
+ "pred_label": 244.1999969482422,
148
+ "rewards/accuracies": 0.7124999761581421,
149
+ "rewards/chosen": -1.3193671703338623,
150
+ "rewards/margins": 1.5176814794540405,
151
+ "rewards/rejected": -2.8370487689971924,
152
  "step": 80,
153
+ "use_label": 957.7999877929688
154
  },
155
  {
156
  "epoch": 0.09,
157
  "learning_rate": 4.6875e-05,
158
+ "logits/chosen": -2.650569200515747,
159
+ "logits/rejected": -2.7028753757476807,
160
+ "logps/chosen": -316.5365295410156,
161
+ "logps/rejected": -307.62799072265625,
162
+ "loss": 0.2342,
163
+ "pred_label": 363.79998779296875,
164
+ "rewards/accuracies": 0.7250000238418579,
165
+ "rewards/chosen": -2.0753350257873535,
166
+ "rewards/margins": 2.060016632080078,
167
+ "rewards/rejected": -4.135351657867432,
168
  "step": 90,
169
+ "use_label": 998.2000122070312
170
  },
171
  {
172
  "epoch": 0.1,
173
  "learning_rate": 4.976717112922003e-05,
174
+ "logits/chosen": -2.664170265197754,
175
+ "logits/rejected": -2.6609156131744385,
176
+ "logps/chosen": -301.24493408203125,
177
+ "logps/rejected": -345.8691101074219,
178
+ "loss": 0.2337,
179
+ "pred_label": 482.125,
180
+ "rewards/accuracies": 0.6875,
181
+ "rewards/chosen": -2.711317300796509,
182
+ "rewards/margins": 2.521953582763672,
183
+ "rewards/rejected": -5.23327112197876,
184
  "step": 100,
185
+ "use_label": 1039.875
186
  },
187
  {
188
  "epoch": 0.12,
189
  "learning_rate": 4.918509895227008e-05,
190
+ "logits/chosen": -2.6410791873931885,
191
+ "logits/rejected": -2.587087631225586,
192
+ "logps/chosen": -328.8195495605469,
193
+ "logps/rejected": -354.773193359375,
194
+ "loss": 0.1381,
195
+ "pred_label": 612.7750244140625,
196
  "rewards/accuracies": 0.675000011920929,
197
+ "rewards/chosen": -6.414492607116699,
198
+ "rewards/margins": 4.145485877990723,
199
+ "rewards/rejected": -10.559977531433105,
200
  "step": 110,
201
+ "use_label": 1069.2249755859375
202
  },
203
  {
204
  "epoch": 0.13,
205
  "learning_rate": 4.860302677532014e-05,
206
+ "logits/chosen": -2.485827684402466,
207
+ "logits/rejected": -2.4543118476867676,
208
+ "logps/chosen": -401.1512756347656,
209
+ "logps/rejected": -415.895263671875,
210
+ "loss": 0.1156,
211
+ "pred_label": 747.8499755859375,
212
+ "rewards/accuracies": 0.699999988079071,
213
+ "rewards/chosen": -12.163839340209961,
214
+ "rewards/margins": 4.657315254211426,
215
+ "rewards/rejected": -16.821155548095703,
216
  "step": 120,
217
+ "use_label": 1094.1500244140625
218
  },
219
  {
220
  "epoch": 0.14,
221
  "learning_rate": 4.80209545983702e-05,
222
+ "logits/chosen": -2.586129665374756,
223
+ "logits/rejected": -2.4943180084228516,
224
+ "logps/chosen": -397.7897033691406,
225
+ "logps/rejected": -390.68902587890625,
226
+ "loss": 0.181,
227
+ "pred_label": 880.6749877929688,
228
+ "rewards/accuracies": 0.625,
229
+ "rewards/chosen": -9.052229881286621,
230
+ "rewards/margins": 2.705533742904663,
231
+ "rewards/rejected": -11.757763862609863,
232
  "step": 130,
233
+ "use_label": 1121.324951171875
234
  },
235
  {
236
  "epoch": 0.15,
237
  "learning_rate": 4.743888242142026e-05,
238
+ "logits/chosen": -2.4943366050720215,
239
+ "logits/rejected": -2.467221736907959,
240
+ "logps/chosen": -360.2618103027344,
241
+ "logps/rejected": -391.42254638671875,
242
+ "loss": 0.1307,
243
+ "pred_label": 1014.9000244140625,
244
+ "rewards/accuracies": 0.6499999761581421,
245
+ "rewards/chosen": -10.215502738952637,
246
+ "rewards/margins": 5.424862384796143,
247
+ "rewards/rejected": -15.640365600585938,
248
  "step": 140,
249
+ "use_label": 1147.0999755859375
250
  },
251
  {
252
  "epoch": 0.16,
253
  "learning_rate": 4.685681024447032e-05,
254
+ "logits/chosen": -2.448756456375122,
255
+ "logits/rejected": -2.4686672687530518,
256
+ "logps/chosen": -423.26348876953125,
257
+ "logps/rejected": -462.45587158203125,
258
+ "loss": 0.1291,
259
+ "pred_label": 1152.449951171875,
260
+ "rewards/accuracies": 0.612500011920929,
261
+ "rewards/chosen": -12.837747573852539,
262
+ "rewards/margins": 5.233094692230225,
263
+ "rewards/rejected": -18.070842742919922,
264
  "step": 150,
265
+ "use_label": 1169.550048828125
266
  },
267
  {
268
  "epoch": 0.17,
269
  "learning_rate": 4.6274738067520374e-05,
270
+ "logits/chosen": -2.5502519607543945,
271
+ "logits/rejected": -2.4940075874328613,
272
+ "logps/chosen": -338.2818908691406,
273
+ "logps/rejected": -399.33062744140625,
274
+ "loss": 0.1221,
275
+ "pred_label": 1286.300048828125,
276
+ "rewards/accuracies": 0.699999988079071,
277
+ "rewards/chosen": -7.589987754821777,
278
+ "rewards/margins": 5.438824653625488,
279
+ "rewards/rejected": -13.02881145477295,
280
  "step": 160,
281
+ "use_label": 1195.699951171875
282
  },
283
  {
284
  "epoch": 0.18,
285
  "learning_rate": 4.5692665890570435e-05,
286
+ "logits/chosen": -2.3367486000061035,
287
+ "logits/rejected": -2.268026351928711,
288
+ "logps/chosen": -608.579833984375,
289
+ "logps/rejected": -566.7401123046875,
290
+ "loss": 0.0863,
291
+ "pred_label": 1421.699951171875,
292
+ "rewards/accuracies": 0.637499988079071,
293
+ "rewards/chosen": -31.210372924804688,
294
+ "rewards/margins": 0.4991304278373718,
295
+ "rewards/rejected": -31.70950698852539,
296
  "step": 170,
297
+ "use_label": 1220.300048828125
298
  },
299
  {
300
  "epoch": 0.19,
301
  "learning_rate": 4.511059371362049e-05,
302
+ "logits/chosen": -2.5058579444885254,
303
+ "logits/rejected": -2.460585117340088,
304
+ "logps/chosen": -369.10260009765625,
305
+ "logps/rejected": -435.2815856933594,
306
+ "loss": 0.1377,
307
+ "pred_label": 1565.5,
308
+ "rewards/accuracies": 0.6937500238418579,
309
+ "rewards/chosen": -11.317340850830078,
310
+ "rewards/margins": 5.142584800720215,
311
+ "rewards/rejected": -16.459924697875977,
312
  "step": 180,
313
+ "use_label": 1236.5
314
  },
315
  {
316
  "epoch": 0.2,
317
  "learning_rate": 4.452852153667055e-05,
318
+ "logits/chosen": -2.689570665359497,
319
+ "logits/rejected": -2.6224443912506104,
320
+ "logps/chosen": -352.5258483886719,
321
+ "logps/rejected": -360.037841796875,
322
+ "loss": 0.1658,
323
+ "pred_label": 1696.5,
324
+ "rewards/accuracies": 0.78125,
325
+ "rewards/chosen": -4.5269904136657715,
326
+ "rewards/margins": 5.3725738525390625,
327
+ "rewards/rejected": -9.899563789367676,
328
  "step": 190,
329
+ "use_label": 1265.5
330
  },
331
  {
332
  "epoch": 0.21,
333
  "learning_rate": 4.394644935972061e-05,
334
+ "logits/chosen": -2.663055658340454,
335
+ "logits/rejected": -2.6471009254455566,
336
+ "logps/chosen": -329.58465576171875,
337
+ "logps/rejected": -377.0941467285156,
338
+ "loss": 0.1567,
339
+ "pred_label": 1831.4000244140625,
340
+ "rewards/accuracies": 0.668749988079071,
341
+ "rewards/chosen": -6.82558536529541,
342
+ "rewards/margins": 4.059788227081299,
343
+ "rewards/rejected": -10.8853759765625,
344
  "step": 200,
345
+ "use_label": 1290.5999755859375
346
  },
347
  {
348
  "epoch": 0.22,
349
  "learning_rate": 4.336437718277067e-05,
350
+ "logits/chosen": -2.6221835613250732,
351
+ "logits/rejected": -2.6028318405151367,
352
+ "logps/chosen": -400.9311828613281,
353
+ "logps/rejected": -399.74774169921875,
354
+ "loss": 0.1038,
355
+ "pred_label": 1967.925048828125,
356
+ "rewards/accuracies": 0.6812499761581421,
357
+ "rewards/chosen": -9.968328475952148,
358
+ "rewards/margins": 4.587479591369629,
359
+ "rewards/rejected": -14.555807113647461,
360
  "step": 210,
361
+ "use_label": 1314.074951171875
362
  },
363
  {
364
  "epoch": 0.23,
365
  "learning_rate": 4.278230500582072e-05,
366
+ "logits/chosen": -2.5327601432800293,
367
+ "logits/rejected": -2.5170798301696777,
368
+ "logps/chosen": -431.26373291015625,
369
+ "logps/rejected": -491.82647705078125,
370
+ "loss": 0.0838,
371
+ "pred_label": 2115.125,
372
+ "rewards/accuracies": 0.6937500238418579,
373
+ "rewards/chosen": -15.627206802368164,
374
+ "rewards/margins": 6.489752292633057,
375
+ "rewards/rejected": -22.116960525512695,
376
  "step": 220,
377
+ "use_label": 1326.875
378
  },
379
  {
380
  "epoch": 0.24,
381
  "learning_rate": 4.220023282887078e-05,
382
+ "logits/chosen": -2.2676877975463867,
383
+ "logits/rejected": -2.275028944015503,
384
+ "logps/chosen": -759.7401123046875,
385
+ "logps/rejected": -772.6349487304688,
386
+ "loss": 0.0695,
387
+ "pred_label": 2261.175048828125,
388
+ "rewards/accuracies": 0.6187499761581421,
389
+ "rewards/chosen": -45.156700134277344,
390
+ "rewards/margins": 5.6958770751953125,
391
+ "rewards/rejected": -50.852577209472656,
392
  "step": 230,
393
+ "use_label": 1340.824951171875
394
  },
395
  {
396
  "epoch": 0.25,
397
  "learning_rate": 4.161816065192084e-05,
398
+ "logits/chosen": -2.720881700515747,
399
+ "logits/rejected": -2.667050361633301,
400
+ "logps/chosen": -346.90936279296875,
401
+ "logps/rejected": -372.1588439941406,
402
+ "loss": 0.1159,
403
+ "pred_label": 2400.875,
404
+ "rewards/accuracies": 0.706250011920929,
405
+ "rewards/chosen": -7.2331953048706055,
406
+ "rewards/margins": 7.680712699890137,
407
+ "rewards/rejected": -14.913908004760742,
408
  "step": 240,
409
+ "use_label": 1361.125
410
  },
411
  {
412
  "epoch": 0.26,
413
  "learning_rate": 4.10360884749709e-05,
414
+ "logits/chosen": -2.7150168418884277,
415
+ "logits/rejected": -2.7007734775543213,
416
+ "logps/chosen": -368.1573486328125,
417
+ "logps/rejected": -409.0894470214844,
418
+ "loss": 0.1438,
419
+ "pred_label": 2528.425048828125,
420
+ "rewards/accuracies": 0.7250000238418579,
421
+ "rewards/chosen": -7.751723289489746,
422
+ "rewards/margins": 6.2153401374816895,
423
+ "rewards/rejected": -13.967063903808594,
424
  "step": 250,
425
+ "use_label": 1393.574951171875
426
  },
427
  {
428
  "epoch": 0.27,
429
  "learning_rate": 4.045401629802096e-05,
430
+ "logits/chosen": -2.7338109016418457,
431
+ "logits/rejected": -2.7302541732788086,
432
+ "logps/chosen": -320.41326904296875,
433
+ "logps/rejected": -374.7262878417969,
434
+ "loss": 0.1538,
435
+ "pred_label": 2655.949951171875,
436
+ "rewards/accuracies": 0.65625,
437
+ "rewards/chosen": -7.180386543273926,
438
+ "rewards/margins": 5.263332843780518,
439
+ "rewards/rejected": -12.443718910217285,
440
  "step": 260,
441
+ "use_label": 1426.050048828125
442
  },
443
  {
444
  "epoch": 0.28,
445
  "learning_rate": 3.9871944121071014e-05,
446
+ "logits/chosen": -2.729078769683838,
447
+ "logits/rejected": -2.7090542316436768,
448
+ "logps/chosen": -375.9404296875,
449
+ "logps/rejected": -433.7308654785156,
450
+ "loss": 0.1174,
451
+ "pred_label": 2794.300048828125,
452
+ "rewards/accuracies": 0.78125,
453
+ "rewards/chosen": -8.406911849975586,
454
+ "rewards/margins": 7.4909772872924805,
455
+ "rewards/rejected": -15.89788818359375,
456
  "step": 270,
457
+ "use_label": 1447.699951171875
458
  },
459
  {
460
  "epoch": 0.29,
461
  "learning_rate": 3.928987194412107e-05,
462
+ "logits/chosen": -2.7120089530944824,
463
+ "logits/rejected": -2.690830707550049,
464
+ "logps/chosen": -403.83233642578125,
465
+ "logps/rejected": -437.89410400390625,
466
+ "loss": 0.0926,
467
+ "pred_label": 2935.449951171875,
468
+ "rewards/accuracies": 0.737500011920929,
469
+ "rewards/chosen": -10.986837387084961,
470
+ "rewards/margins": 6.579653263092041,
471
+ "rewards/rejected": -17.566490173339844,
472
  "step": 280,
473
+ "use_label": 1466.550048828125
474
  },
475
  {
476
  "epoch": 0.3,
477
  "learning_rate": 3.870779976717113e-05,
478
+ "logits/chosen": -2.637876033782959,
479
+ "logits/rejected": -2.5659611225128174,
480
+ "logps/chosen": -455.518310546875,
481
+ "logps/rejected": -462.74267578125,
482
+ "loss": 0.0968,
483
+ "pred_label": 3076.324951171875,
484
+ "rewards/accuracies": 0.6187499761581421,
485
+ "rewards/chosen": -16.259593963623047,
486
+ "rewards/margins": 6.993022918701172,
487
+ "rewards/rejected": -23.25261688232422,
488
  "step": 290,
489
+ "use_label": 1485.675048828125
490
  },
491
  {
492
  "epoch": 0.31,
493
  "learning_rate": 3.812572759022119e-05,
494
+ "logits/chosen": -2.5933048725128174,
495
+ "logits/rejected": -2.5245001316070557,
496
+ "logps/chosen": -432.7098083496094,
497
+ "logps/rejected": -497.24554443359375,
498
+ "loss": 0.0835,
499
+ "pred_label": 3217.25,
500
+ "rewards/accuracies": 0.6499999761581421,
501
+ "rewards/chosen": -16.903099060058594,
502
+ "rewards/margins": 8.837041854858398,
503
+ "rewards/rejected": -25.74013900756836,
504
  "step": 300,
505
+ "use_label": 1504.75
506
  },
507
  {
508
  "epoch": 0.32,
509
  "learning_rate": 3.7543655413271246e-05,
510
+ "logits/chosen": -2.4902329444885254,
511
+ "logits/rejected": -2.3905444145202637,
512
+ "logps/chosen": -440.56085205078125,
513
+ "logps/rejected": -660.7030029296875,
514
+ "loss": 0.0981,
515
+ "pred_label": 3356.77490234375,
516
+ "rewards/accuracies": 0.7562500238418579,
517
+ "rewards/chosen": -17.362918853759766,
518
+ "rewards/margins": 19.8530330657959,
519
+ "rewards/rejected": -37.21595764160156,
520
  "step": 310,
521
+ "use_label": 1525.2249755859375
522
  },
523
  {
524
  "epoch": 0.33,
525
  "learning_rate": 3.696158323632131e-05,
526
+ "logits/chosen": -2.2354841232299805,
527
+ "logits/rejected": -2.1421380043029785,
528
+ "logps/chosen": -747.2213134765625,
529
+ "logps/rejected": -914.2220458984375,
530
+ "loss": 0.0656,
531
+ "pred_label": 3500.35009765625,
532
+ "rewards/accuracies": 0.6625000238418579,
533
+ "rewards/chosen": -45.124141693115234,
534
+ "rewards/margins": 18.217256546020508,
535
+ "rewards/rejected": -63.341407775878906,
536
  "step": 320,
537
+ "use_label": 1541.6500244140625
538
  },
539
  {
540
  "epoch": 0.35,
541
  "learning_rate": 3.637951105937136e-05,
542
+ "logits/chosen": -2.2868189811706543,
543
+ "logits/rejected": -2.2072067260742188,
544
+ "logps/chosen": -698.5914306640625,
545
+ "logps/rejected": -843.9619140625,
546
+ "loss": 0.0646,
547
+ "pred_label": 3651.77490234375,
548
+ "rewards/accuracies": 0.6187499761581421,
549
+ "rewards/chosen": -42.7642936706543,
550
+ "rewards/margins": 18.20369529724121,
551
+ "rewards/rejected": -60.96799850463867,
552
  "step": 330,
553
+ "use_label": 1550.2249755859375
554
  },
555
  {
556
  "epoch": 0.36,
557
  "learning_rate": 3.579743888242142e-05,
558
+ "logits/chosen": -2.1754813194274902,
559
+ "logits/rejected": -2.1737990379333496,
560
+ "logps/chosen": -933.4059448242188,
561
+ "logps/rejected": -1077.489013671875,
562
+ "loss": 0.0609,
563
+ "pred_label": 3802.02490234375,
564
+ "rewards/accuracies": 0.581250011920929,
565
+ "rewards/chosen": -64.42259216308594,
566
+ "rewards/margins": 19.241281509399414,
567
+ "rewards/rejected": -83.66387176513672,
568
  "step": 340,
569
+ "use_label": 1559.9749755859375
570
  },
571
  {
572
  "epoch": 0.37,
573
  "learning_rate": 3.5215366705471484e-05,
574
+ "logits/chosen": -2.3112852573394775,
575
+ "logits/rejected": -2.2864885330200195,
576
+ "logps/chosen": -736.1566162109375,
577
+ "logps/rejected": -973.3111572265625,
578
+ "loss": 0.0509,
579
+ "pred_label": 3953.425048828125,
580
+ "rewards/accuracies": 0.6875,
581
+ "rewards/chosen": -43.89619064331055,
582
+ "rewards/margins": 25.668701171875,
583
+ "rewards/rejected": -69.56489562988281,
584
  "step": 350,
585
+ "use_label": 1568.574951171875
586
  },
587
  {
588
  "epoch": 0.38,
589
  "learning_rate": 3.463329452852154e-05,
590
+ "logits/chosen": -2.3861846923828125,
591
+ "logits/rejected": -2.348050117492676,
592
+ "logps/chosen": -612.4452514648438,
593
+ "logps/rejected": -722.9779663085938,
594
+ "loss": 0.0589,
595
+ "pred_label": 4104.22509765625,
596
+ "rewards/accuracies": 0.6312500238418579,
597
+ "rewards/chosen": -33.965110778808594,
598
+ "rewards/margins": 11.812593460083008,
599
+ "rewards/rejected": -45.777706146240234,
600
  "step": 360,
601
+ "use_label": 1577.7750244140625
602
  },
603
  {
604
  "epoch": 0.39,
605
  "learning_rate": 3.40512223515716e-05,
606
+ "logits/chosen": -2.5135788917541504,
607
+ "logits/rejected": -2.431811571121216,
608
+ "logps/chosen": -424.55120849609375,
609
+ "logps/rejected": -567.4149780273438,
610
+ "loss": 0.0861,
611
+ "pred_label": 4248.5498046875,
612
+ "rewards/accuracies": 0.7124999761581421,
613
+ "rewards/chosen": -14.834848403930664,
614
+ "rewards/margins": 14.3344144821167,
615
+ "rewards/rejected": -29.169261932373047,
616
  "step": 370,
617
+ "use_label": 1593.449951171875
618
  },
619
  {
620
  "epoch": 0.4,
621
  "learning_rate": 3.3469150174621654e-05,
622
+ "logits/chosen": -2.5888447761535645,
623
+ "logits/rejected": -2.5503790378570557,
624
+ "logps/chosen": -432.38995361328125,
625
+ "logps/rejected": -449.0523986816406,
626
+ "loss": 0.1037,
627
+ "pred_label": 4388.47509765625,
628
+ "rewards/accuracies": 0.699999988079071,
629
+ "rewards/chosen": -14.715968132019043,
630
+ "rewards/margins": 4.131407260894775,
631
+ "rewards/rejected": -18.847375869750977,
632
  "step": 380,
633
+ "use_label": 1613.5250244140625
634
  },
635
  {
636
  "epoch": 0.41,
637
  "learning_rate": 3.288707799767171e-05,
638
+ "logits/chosen": -2.4642863273620605,
639
+ "logits/rejected": -2.3922629356384277,
640
+ "logps/chosen": -390.8463439941406,
641
+ "logps/rejected": -461.17919921875,
642
+ "loss": 0.1182,
643
+ "pred_label": 4530.875,
644
+ "rewards/accuracies": 0.731249988079071,
645
+ "rewards/chosen": -10.805445671081543,
646
+ "rewards/margins": 10.884265899658203,
647
+ "rewards/rejected": -21.689708709716797,
648
  "step": 390,
649
+ "use_label": 1631.125
650
  },
651
  {
652
  "epoch": 0.42,
653
  "learning_rate": 3.2305005820721776e-05,
654
+ "logits/chosen": -2.708742141723633,
655
+ "logits/rejected": -2.668813943862915,
656
+ "logps/chosen": -360.8929138183594,
657
+ "logps/rejected": -357.1689758300781,
658
+ "loss": 0.1227,
659
+ "pred_label": 4662.125,
660
+ "rewards/accuracies": 0.675000011920929,
661
+ "rewards/chosen": -5.38419246673584,
662
+ "rewards/margins": 4.3987908363342285,
663
+ "rewards/rejected": -9.782983779907227,
664
  "step": 400,
665
+ "use_label": 1659.875
666
  },
667
  {
668
  "epoch": 0.43,
669
  "learning_rate": 3.172293364377183e-05,
670
+ "logits/chosen": -2.703439474105835,
671
+ "logits/rejected": -2.6409201622009277,
672
+ "logps/chosen": -355.69219970703125,
673
+ "logps/rejected": -346.83416748046875,
674
+ "loss": 0.1313,
675
+ "pred_label": 4792.97509765625,
676
+ "rewards/accuracies": 0.606249988079071,
677
+ "rewards/chosen": -5.759453773498535,
678
+ "rewards/margins": 3.0691254138946533,
679
+ "rewards/rejected": -8.828579902648926,
680
  "step": 410,
681
+ "use_label": 1689.0250244140625
682
  },
683
  {
684
  "epoch": 0.44,
685
  "learning_rate": 3.1140861466821885e-05,
686
+ "logits/chosen": -2.617567539215088,
687
+ "logits/rejected": -2.578650951385498,
688
+ "logps/chosen": -307.09552001953125,
689
+ "logps/rejected": -360.4197998046875,
690
+ "loss": 0.1338,
691
+ "pred_label": 4930.9501953125,
692
+ "rewards/accuracies": 0.706250011920929,
693
+ "rewards/chosen": -6.651512145996094,
694
+ "rewards/margins": 4.47898006439209,
695
+ "rewards/rejected": -11.130491256713867,
696
  "step": 420,
697
+ "use_label": 1711.050048828125
698
  },
699
  {
700
  "epoch": 0.45,
701
  "learning_rate": 3.055878928987195e-05,
702
+ "logits/chosen": -2.6378731727600098,
703
+ "logits/rejected": -2.5847997665405273,
704
+ "logps/chosen": -359.0061340332031,
705
+ "logps/rejected": -389.7403564453125,
706
+ "loss": 0.1136,
707
+ "pred_label": 5066.6748046875,
708
+ "rewards/accuracies": 0.668749988079071,
709
+ "rewards/chosen": -8.110578536987305,
710
+ "rewards/margins": 4.911756992340088,
711
+ "rewards/rejected": -13.02233600616455,
712
  "step": 430,
713
+ "use_label": 1735.324951171875
714
  },
715
  {
716
  "epoch": 0.46,
717
  "learning_rate": 2.9976717112922005e-05,
718
+ "logits/chosen": -2.705080509185791,
719
+ "logits/rejected": -2.6582016944885254,
720
+ "logps/chosen": -361.77703857421875,
721
+ "logps/rejected": -369.16888427734375,
722
+ "loss": 0.1282,
723
+ "pred_label": 5203.0498046875,
724
+ "rewards/accuracies": 0.65625,
725
+ "rewards/chosen": -7.850671291351318,
726
+ "rewards/margins": 3.7665488719940186,
727
+ "rewards/rejected": -11.617219924926758,
728
  "step": 440,
729
+ "use_label": 1758.949951171875
730
  },
731
  {
732
  "epoch": 0.47,
733
  "learning_rate": 2.939464493597206e-05,
734
+ "logits/chosen": -2.387315273284912,
735
+ "logits/rejected": -2.332853317260742,
736
+ "logps/chosen": -343.62481689453125,
737
+ "logps/rejected": -404.4623107910156,
738
+ "loss": 0.1146,
739
+ "pred_label": 5345.10009765625,
740
+ "rewards/accuracies": 0.637499988079071,
741
+ "rewards/chosen": -11.390606880187988,
742
+ "rewards/margins": 6.090449333190918,
743
+ "rewards/rejected": -17.481056213378906,
744
  "step": 450,
745
+ "use_label": 1776.9000244140625
746
  },
747
  {
748
  "epoch": 0.48,
749
  "learning_rate": 2.881257275902212e-05,
750
+ "logits/chosen": -2.5489556789398193,
751
+ "logits/rejected": -2.5125844478607178,
752
+ "logps/chosen": -429.1163024902344,
753
+ "logps/rejected": -433.0736389160156,
754
+ "loss": 0.1077,
755
+ "pred_label": 5487.8251953125,
756
+ "rewards/accuracies": 0.6499999761581421,
757
+ "rewards/chosen": -12.731410026550293,
758
+ "rewards/margins": 3.6760268211364746,
759
+ "rewards/rejected": -16.40743637084961,
760
  "step": 460,
761
+ "use_label": 1794.175048828125
762
  },
763
  {
764
  "epoch": 0.49,
765
  "learning_rate": 2.8230500582072178e-05,
766
+ "logits/chosen": -2.482365369796753,
767
+ "logits/rejected": -2.3831024169921875,
768
+ "logps/chosen": -389.73785400390625,
769
+ "logps/rejected": -430.03973388671875,
770
+ "loss": 0.0982,
771
+ "pred_label": 5632.7998046875,
772
+ "rewards/accuracies": 0.65625,
773
+ "rewards/chosen": -12.2637357711792,
774
+ "rewards/margins": 5.055985450744629,
775
+ "rewards/rejected": -17.319721221923828,
776
  "step": 470,
777
+ "use_label": 1809.199951171875
778
  },
779
  {
780
  "epoch": 0.5,
781
  "learning_rate": 2.7648428405122233e-05,
782
+ "logits/chosen": -2.414053201675415,
783
+ "logits/rejected": -2.3774731159210205,
784
+ "logps/chosen": -391.9467468261719,
785
+ "logps/rejected": -501.77960205078125,
786
+ "loss": 0.094,
787
+ "pred_label": 5778.2001953125,
788
+ "rewards/accuracies": 0.6625000238418579,
789
+ "rewards/chosen": -14.637247085571289,
790
+ "rewards/margins": 10.688766479492188,
791
+ "rewards/rejected": -25.32601547241211,
792
  "step": 480,
793
+ "use_label": 1823.800048828125
794
  },
795
  {
796
  "epoch": 0.51,
797
  "learning_rate": 2.7066356228172297e-05,
798
+ "logits/chosen": -2.541294574737549,
799
+ "logits/rejected": -2.4439327716827393,
800
+ "logps/chosen": -361.71771240234375,
801
+ "logps/rejected": -378.765869140625,
802
+ "loss": 0.0927,
803
+ "pred_label": 5916.3251953125,
804
+ "rewards/accuracies": 0.699999988079071,
805
+ "rewards/chosen": -9.4688720703125,
806
+ "rewards/margins": 4.668297290802002,
807
+ "rewards/rejected": -14.137168884277344,
808
  "step": 490,
809
+ "use_label": 1845.675048828125
810
  },
811
  {
812
  "epoch": 0.52,
813
  "learning_rate": 2.6484284051222352e-05,
814
+ "logits/chosen": -2.6095941066741943,
815
+ "logits/rejected": -2.4995357990264893,
816
+ "logps/chosen": -364.07427978515625,
817
+ "logps/rejected": -420.4052734375,
818
+ "loss": 0.1016,
819
+ "pred_label": 6059.4501953125,
820
+ "rewards/accuracies": 0.71875,
821
+ "rewards/chosen": -7.4617791175842285,
822
+ "rewards/margins": 8.752523422241211,
823
+ "rewards/rejected": -16.214303970336914,
824
  "step": 500,
825
+ "use_label": 1862.550048828125
826
  },
827
  {
828
  "epoch": 0.53,
829
  "learning_rate": 2.590221187427241e-05,
830
+ "logits/chosen": -2.570039749145508,
831
+ "logits/rejected": -2.480402946472168,
832
+ "logps/chosen": -391.964111328125,
833
+ "logps/rejected": -434.7195739746094,
834
+ "loss": 0.1051,
835
+ "pred_label": 6202.64990234375,
836
+ "rewards/accuracies": 0.6625000238418579,
837
+ "rewards/chosen": -11.662691116333008,
838
+ "rewards/margins": 8.459746360778809,
839
+ "rewards/rejected": -20.1224365234375,
840
  "step": 510,
841
+ "use_label": 1879.3499755859375
842
  },
843
  {
844
  "epoch": 0.54,
845
  "learning_rate": 2.532013969732247e-05,
846
+ "logits/chosen": -2.4564194679260254,
847
+ "logits/rejected": -2.381622076034546,
848
+ "logps/chosen": -406.453125,
849
+ "logps/rejected": -494.87664794921875,
850
+ "loss": 0.068,
851
+ "pred_label": 6340.1748046875,
852
+ "rewards/accuracies": 0.7250000238418579,
853
+ "rewards/chosen": -11.449905395507812,
854
+ "rewards/margins": 11.066286087036133,
855
+ "rewards/rejected": -22.516191482543945,
856
  "step": 520,
857
+ "use_label": 1901.824951171875
858
  },
859
  {
860
  "epoch": 0.55,
861
  "learning_rate": 2.4738067520372525e-05,
862
+ "logits/chosen": -2.3373048305511475,
863
+ "logits/rejected": -2.183617115020752,
864
+ "logps/chosen": -547.97314453125,
865
+ "logps/rejected": -691.542236328125,
866
+ "loss": 0.0709,
867
+ "pred_label": 6487.5498046875,
868
+ "rewards/accuracies": 0.643750011920929,
869
+ "rewards/chosen": -24.765727996826172,
870
+ "rewards/margins": 14.99103832244873,
871
+ "rewards/rejected": -39.75676727294922,
872
  "step": 530,
873
+ "use_label": 1914.449951171875
874
  },
875
  {
876
  "epoch": 0.57,
877
  "learning_rate": 2.4155995343422587e-05,
878
+ "logits/chosen": -2.211622714996338,
879
+ "logits/rejected": -2.0457851886749268,
880
+ "logps/chosen": -632.00634765625,
881
+ "logps/rejected": -867.4461059570312,
882
+ "loss": 0.05,
883
+ "pred_label": 6636.0498046875,
884
+ "rewards/accuracies": 0.7250000238418579,
885
+ "rewards/chosen": -34.34697723388672,
886
+ "rewards/margins": 25.800724029541016,
887
+ "rewards/rejected": -60.14769744873047,
888
  "step": 540,
889
+ "use_label": 1925.949951171875
890
  },
891
  {
892
  "epoch": 0.58,
893
  "learning_rate": 2.3573923166472644e-05,
894
+ "logits/chosen": -2.088925838470459,
895
+ "logits/rejected": -1.9701734781265259,
896
+ "logps/chosen": -796.8790893554688,
897
+ "logps/rejected": -983.8599853515625,
898
+ "loss": 0.045,
899
+ "pred_label": 6784.7998046875,
900
+ "rewards/accuracies": 0.675000011920929,
901
+ "rewards/chosen": -50.76759338378906,
902
+ "rewards/margins": 21.424175262451172,
903
+ "rewards/rejected": -72.19176483154297,
904
  "step": 550,
905
+ "use_label": 1937.199951171875
906
  },
907
  {
908
  "epoch": 0.59,
909
  "learning_rate": 2.2991850989522702e-05,
910
+ "logits/chosen": -2.2610983848571777,
911
+ "logits/rejected": -2.1500396728515625,
912
+ "logps/chosen": -535.4710693359375,
913
+ "logps/rejected": -635.5113525390625,
914
+ "loss": 0.067,
915
+ "pred_label": 6936.47509765625,
916
+ "rewards/accuracies": 0.6875,
917
+ "rewards/chosen": -26.10666847229004,
918
+ "rewards/margins": 15.695854187011719,
919
+ "rewards/rejected": -41.802528381347656,
920
  "step": 560,
921
+ "use_label": 1945.5250244140625
922
  },
923
  {
924
  "epoch": 0.6,
925
  "learning_rate": 2.240977881257276e-05,
926
+ "logits/chosen": -2.3317577838897705,
927
+ "logits/rejected": -2.215259313583374,
928
+ "logps/chosen": -497.422119140625,
929
+ "logps/rejected": -631.938232421875,
930
+ "loss": 0.0596,
931
+ "pred_label": 7086.02490234375,
932
+ "rewards/accuracies": 0.706250011920929,
933
+ "rewards/chosen": -22.793350219726562,
934
+ "rewards/margins": 14.197186470031738,
935
+ "rewards/rejected": -36.99053955078125,
936
  "step": 570,
937
+ "use_label": 1955.9749755859375
938
  },
939
  {
940
  "epoch": 0.61,
941
  "learning_rate": 2.1827706635622818e-05,
942
+ "logits/chosen": -2.399937152862549,
943
+ "logits/rejected": -2.2866618633270264,
944
+ "logps/chosen": -441.74090576171875,
945
+ "logps/rejected": -541.2008666992188,
946
+ "loss": 0.0784,
947
+ "pred_label": 7230.875,
948
+ "rewards/accuracies": 0.699999988079071,
949
+ "rewards/chosen": -16.363243103027344,
950
+ "rewards/margins": 11.310129165649414,
951
+ "rewards/rejected": -27.67337417602539,
952
  "step": 580,
953
+ "use_label": 1971.125
954
  },
955
  {
956
  "epoch": 0.62,
957
  "learning_rate": 2.124563445867288e-05,
958
+ "logits/chosen": -2.3154327869415283,
959
+ "logits/rejected": -2.2939510345458984,
960
+ "logps/chosen": -400.68572998046875,
961
+ "logps/rejected": -483.30279541015625,
962
+ "loss": 0.089,
963
+ "pred_label": 7372.14990234375,
964
+ "rewards/accuracies": 0.6812499761581421,
965
+ "rewards/chosen": -12.908785820007324,
966
+ "rewards/margins": 7.343487739562988,
967
+ "rewards/rejected": -20.252273559570312,
968
  "step": 590,
969
+ "use_label": 1989.8499755859375
970
  },
971
  {
972
  "epoch": 0.63,
973
  "learning_rate": 2.0663562281722934e-05,
974
+ "logits/chosen": -2.463757276535034,
975
+ "logits/rejected": -2.3977725505828857,
976
+ "logps/chosen": -393.55133056640625,
977
+ "logps/rejected": -500.068359375,
978
+ "loss": 0.1022,
979
+ "pred_label": 7510.75,
980
+ "rewards/accuracies": 0.7437499761581421,
981
+ "rewards/chosen": -8.018844604492188,
982
+ "rewards/margins": 11.711953163146973,
983
+ "rewards/rejected": -19.730796813964844,
984
  "step": 600,
985
+ "use_label": 2011.25
986
  },
987
  {
988
  "epoch": 0.64,
989
  "learning_rate": 2.0081490104772992e-05,
990
+ "logits/chosen": -2.409186840057373,
991
+ "logits/rejected": -2.3435616493225098,
992
+ "logps/chosen": -412.2386779785156,
993
+ "logps/rejected": -426.6654357910156,
994
+ "loss": 0.0856,
995
+ "pred_label": 7652.7001953125,
996
+ "rewards/accuracies": 0.699999988079071,
997
+ "rewards/chosen": -10.632938385009766,
998
+ "rewards/margins": 8.070967674255371,
999
+ "rewards/rejected": -18.703907012939453,
1000
  "step": 610,
1001
+ "use_label": 2029.300048828125
1002
  },
1003
  {
1004
  "epoch": 0.65,
1005
  "learning_rate": 1.9499417927823053e-05,
1006
+ "logits/chosen": -2.4093079566955566,
1007
+ "logits/rejected": -2.260230779647827,
1008
+ "logps/chosen": -429.3250427246094,
1009
+ "logps/rejected": -516.5551147460938,
1010
+ "loss": 0.0781,
1011
+ "pred_label": 7797.64990234375,
1012
+ "rewards/accuracies": 0.7562500238418579,
1013
+ "rewards/chosen": -14.934354782104492,
1014
+ "rewards/margins": 11.502239227294922,
1015
+ "rewards/rejected": -26.436594009399414,
1016
  "step": 620,
1017
+ "use_label": 2044.3499755859375
1018
  },
1019
  {
1020
  "epoch": 0.66,
1021
  "learning_rate": 1.8917345750873107e-05,
1022
+ "logits/chosen": -2.2741165161132812,
1023
+ "logits/rejected": -2.1207118034362793,
1024
+ "logps/chosen": -464.8804626464844,
1025
+ "logps/rejected": -637.3003540039062,
1026
+ "loss": 0.0665,
1027
+ "pred_label": 7949.27490234375,
1028
+ "rewards/accuracies": 0.7562500238418579,
1029
+ "rewards/chosen": -17.97607421875,
1030
+ "rewards/margins": 19.63665199279785,
1031
+ "rewards/rejected": -37.61272430419922,
1032
  "step": 630,
1033
+ "use_label": 2052.72509765625
1034
  },
1035
  {
1036
  "epoch": 0.67,
1037
  "learning_rate": 1.833527357392317e-05,
1038
+ "logits/chosen": -2.1046862602233887,
1039
+ "logits/rejected": -1.9926544427871704,
1040
+ "logps/chosen": -479.60479736328125,
1041
+ "logps/rejected": -621.544921875,
1042
+ "loss": 0.0622,
1043
+ "pred_label": 8098.72509765625,
1044
+ "rewards/accuracies": 0.699999988079071,
1045
+ "rewards/chosen": -22.641353607177734,
1046
+ "rewards/margins": 14.655824661254883,
1047
+ "rewards/rejected": -37.29717254638672,
1048
  "step": 640,
1049
+ "use_label": 2063.27490234375
1050
  },
1051
  {
1052
  "epoch": 0.68,
1053
  "learning_rate": 1.7753201396973227e-05,
1054
+ "logits/chosen": -2.304948329925537,
1055
+ "logits/rejected": -2.159106731414795,
1056
+ "logps/chosen": -497.8910217285156,
1057
+ "logps/rejected": -595.5516967773438,
1058
+ "loss": 0.0522,
1059
+ "pred_label": 8248.5,
1060
+ "rewards/accuracies": 0.737500011920929,
1061
+ "rewards/chosen": -19.594234466552734,
1062
+ "rewards/margins": 12.16260814666748,
1063
+ "rewards/rejected": -31.7568416595459,
1064
  "step": 650,
1065
+ "use_label": 2073.5
1066
  },
1067
  {
1068
  "epoch": 0.69,
1069
  "learning_rate": 1.717112922002328e-05,
1070
+ "logits/chosen": -2.3109021186828613,
1071
+ "logits/rejected": -2.2408225536346436,
1072
+ "logps/chosen": -490.71112060546875,
1073
+ "logps/rejected": -506.54766845703125,
1074
+ "loss": 0.087,
1075
+ "pred_label": 8398.224609375,
1076
+ "rewards/accuracies": 0.637499988079071,
1077
+ "rewards/chosen": -19.534738540649414,
1078
+ "rewards/margins": 6.132468223571777,
1079
+ "rewards/rejected": -25.667205810546875,
1080
  "step": 660,
1081
+ "use_label": 2083.77490234375
1082
  },
1083
  {
1084
  "epoch": 0.7,
1085
  "learning_rate": 1.6589057043073342e-05,
1086
+ "logits/chosen": -2.2374701499938965,
1087
+ "logits/rejected": -2.11650013923645,
1088
+ "logps/chosen": -484.6102600097656,
1089
+ "logps/rejected": -569.1256103515625,
1090
+ "loss": 0.064,
1091
+ "pred_label": 8546.0,
1092
+ "rewards/accuracies": 0.7250000238418579,
1093
+ "rewards/chosen": -20.35372543334961,
1094
+ "rewards/margins": 12.419096946716309,
1095
+ "rewards/rejected": -32.772823333740234,
1096
  "step": 670,
1097
+ "use_label": 2096.0
1098
  },
1099
  {
1100
  "epoch": 0.71,
1101
  "learning_rate": 1.60069848661234e-05,
1102
+ "logits/chosen": -2.3477349281311035,
1103
+ "logits/rejected": -2.2388532161712646,
1104
+ "logps/chosen": -479.6822814941406,
1105
+ "logps/rejected": -505.4498596191406,
1106
+ "loss": 0.0759,
1107
+ "pred_label": 8695.724609375,
1108
+ "rewards/accuracies": 0.6187499761581421,
1109
+ "rewards/chosen": -19.274688720703125,
1110
+ "rewards/margins": 5.44730281829834,
1111
+ "rewards/rejected": -24.72199058532715,
1112
  "step": 680,
1113
+ "use_label": 2106.27490234375
1114
  },
1115
  {
1116
  "epoch": 0.72,
1117
  "learning_rate": 1.5424912689173458e-05,
1118
+ "logits/chosen": -2.179953098297119,
1119
+ "logits/rejected": -2.0111422538757324,
1120
+ "logps/chosen": -476.6915588378906,
1121
+ "logps/rejected": -548.3240966796875,
1122
+ "loss": 0.0593,
1123
+ "pred_label": 8846.5,
1124
+ "rewards/accuracies": 0.7124999761581421,
1125
+ "rewards/chosen": -22.6307430267334,
1126
+ "rewards/margins": 11.273223876953125,
1127
+ "rewards/rejected": -33.903968811035156,
1128
  "step": 690,
1129
+ "use_label": 2115.5
1130
  },
1131
  {
1132
  "epoch": 0.73,
1133
  "learning_rate": 1.4842840512223516e-05,
1134
+ "logits/chosen": -2.298417329788208,
1135
+ "logits/rejected": -2.1161131858825684,
1136
+ "logps/chosen": -477.685546875,
1137
+ "logps/rejected": -721.3785400390625,
1138
+ "loss": 0.0546,
1139
+ "pred_label": 8998.650390625,
1140
+ "rewards/accuracies": 0.6937500238418579,
1141
+ "rewards/chosen": -20.19955062866211,
1142
+ "rewards/margins": 27.11043930053711,
1143
+ "rewards/rejected": -47.30998992919922,
1144
  "step": 700,
1145
+ "use_label": 2123.35009765625
1146
  },
1147
  {
1148
  "epoch": 0.74,
1149
  "learning_rate": 1.4260768335273575e-05,
1150
+ "logits/chosen": -2.19868803024292,
1151
+ "logits/rejected": -2.0781731605529785,
1152
+ "logps/chosen": -629.6842041015625,
1153
+ "logps/rejected": -780.2570190429688,
1154
+ "loss": 0.0661,
1155
+ "pred_label": 9147.0498046875,
1156
+ "rewards/accuracies": 0.6937500238418579,
1157
+ "rewards/chosen": -34.010963439941406,
1158
+ "rewards/margins": 19.887874603271484,
1159
+ "rewards/rejected": -53.898834228515625,
1160
  "step": 710,
1161
+ "use_label": 2134.949951171875
1162
  },
1163
  {
1164
  "epoch": 0.75,
1165
  "learning_rate": 1.3678696158323633e-05,
1166
+ "logits/chosen": -2.2372653484344482,
1167
+ "logits/rejected": -2.1191489696502686,
1168
+ "logps/chosen": -515.64208984375,
1169
+ "logps/rejected": -691.5008544921875,
1170
+ "loss": 0.0704,
1171
+ "pred_label": 9300.599609375,
1172
+ "rewards/accuracies": 0.7250000238418579,
1173
+ "rewards/chosen": -24.41898536682129,
1174
+ "rewards/margins": 19.390825271606445,
1175
+ "rewards/rejected": -43.809810638427734,
1176
  "step": 720,
1177
+ "use_label": 2141.39990234375
1178
  },
1179
  {
1180
  "epoch": 0.76,
1181
  "learning_rate": 1.309662398137369e-05,
1182
+ "logits/chosen": -2.2743449211120605,
1183
+ "logits/rejected": -2.1811537742614746,
1184
+ "logps/chosen": -521.6466064453125,
1185
+ "logps/rejected": -568.2872314453125,
1186
+ "loss": 0.0724,
1187
+ "pred_label": 9445.150390625,
1188
+ "rewards/accuracies": 0.675000011920929,
1189
+ "rewards/chosen": -22.4196720123291,
1190
+ "rewards/margins": 10.242195129394531,
1191
+ "rewards/rejected": -32.661869049072266,
1192
  "step": 730,
1193
+ "use_label": 2156.85009765625
1194
  },
1195
  {
1196
  "epoch": 0.77,
1197
  "learning_rate": 1.2514551804423749e-05,
1198
+ "logits/chosen": -2.2904646396636963,
1199
+ "logits/rejected": -2.2049851417541504,
1200
+ "logps/chosen": -520.117919921875,
1201
+ "logps/rejected": -695.0534057617188,
1202
+ "loss": 0.0636,
1203
+ "pred_label": 9594.025390625,
1204
+ "rewards/accuracies": 0.706250011920929,
1205
+ "rewards/chosen": -21.274608612060547,
1206
+ "rewards/margins": 19.581838607788086,
1207
+ "rewards/rejected": -40.8564453125,
1208
  "step": 740,
1209
+ "use_label": 2167.97509765625
1210
  },
1211
  {
1212
  "epoch": 0.79,
1213
  "learning_rate": 1.1932479627473807e-05,
1214
+ "logits/chosen": -2.25730562210083,
1215
+ "logits/rejected": -2.1175169944763184,
1216
+ "logps/chosen": -540.2493896484375,
1217
+ "logps/rejected": -738.2742919921875,
1218
+ "loss": 0.063,
1219
+ "pred_label": 9744.1748046875,
1220
+ "rewards/accuracies": 0.7124999761581421,
1221
+ "rewards/chosen": -27.976119995117188,
1222
+ "rewards/margins": 19.7985782623291,
1223
+ "rewards/rejected": -47.77469253540039,
1224
  "step": 750,
1225
+ "use_label": 2177.824951171875
1226
  },
1227
  {
1228
  "epoch": 0.8,
1229
  "learning_rate": 1.1350407450523866e-05,
1230
+ "logits/chosen": -2.3123764991760254,
1231
+ "logits/rejected": -2.2530713081359863,
1232
+ "logps/chosen": -507.7060546875,
1233
+ "logps/rejected": -641.7669067382812,
1234
+ "loss": 0.0539,
1235
+ "pred_label": 9894.150390625,
1236
+ "rewards/accuracies": 0.7124999761581421,
1237
+ "rewards/chosen": -21.224315643310547,
1238
+ "rewards/margins": 16.741058349609375,
1239
+ "rewards/rejected": -37.96537780761719,
1240
  "step": 760,
1241
+ "use_label": 2187.85009765625
1242
  },
1243
  {
1244
  "epoch": 0.81,
1245
  "learning_rate": 1.0768335273573923e-05,
1246
+ "logits/chosen": -2.314664363861084,
1247
+ "logits/rejected": -2.259186029434204,
1248
+ "logps/chosen": -518.42333984375,
1249
+ "logps/rejected": -678.8192749023438,
1250
+ "loss": 0.0433,
1251
+ "pred_label": 10047.875,
1252
+ "rewards/accuracies": 0.7124999761581421,
1253
+ "rewards/chosen": -21.612756729125977,
1254
+ "rewards/margins": 17.79681396484375,
1255
+ "rewards/rejected": -39.409568786621094,
1256
  "step": 770,
1257
+ "use_label": 2194.125
1258
  },
1259
  {
1260
  "epoch": 0.82,
1261
  "learning_rate": 1.0186263096623982e-05,
1262
+ "logits/chosen": -2.287078380584717,
1263
+ "logits/rejected": -2.199553966522217,
1264
+ "logps/chosen": -530.3980102539062,
1265
+ "logps/rejected": -631.2974243164062,
1266
+ "loss": 0.051,
1267
+ "pred_label": 10198.8251953125,
1268
+ "rewards/accuracies": 0.6812499761581421,
1269
+ "rewards/chosen": -24.92042350769043,
1270
+ "rewards/margins": 13.495773315429688,
1271
+ "rewards/rejected": -38.41619873046875,
1272
  "step": 780,
1273
+ "use_label": 2203.175048828125
1274
  },
1275
  {
1276
  "epoch": 0.83,
1277
  "learning_rate": 9.60419091967404e-06,
1278
+ "logits/chosen": -2.3670148849487305,
1279
+ "logits/rejected": -2.2450852394104004,
1280
+ "logps/chosen": -533.3482666015625,
1281
+ "logps/rejected": -667.7590942382812,
1282
+ "loss": 0.0574,
1283
+ "pred_label": 10346.0,
1284
+ "rewards/accuracies": 0.6499999761581421,
1285
+ "rewards/chosen": -26.55388832092285,
1286
+ "rewards/margins": 14.149024963378906,
1287
+ "rewards/rejected": -40.702919006347656,
1288
  "step": 790,
1289
+ "use_label": 2216.0
1290
  },
1291
  {
1292
  "epoch": 0.84,
1293
  "learning_rate": 9.022118742724098e-06,
1294
+ "logits/chosen": -2.2852892875671387,
1295
+ "logits/rejected": -2.191178798675537,
1296
+ "logps/chosen": -515.4843139648438,
1297
+ "logps/rejected": -658.4345703125,
1298
+ "loss": 0.0732,
1299
+ "pred_label": 10492.0,
1300
+ "rewards/accuracies": 0.75,
1301
+ "rewards/chosen": -20.101245880126953,
1302
+ "rewards/margins": 18.136005401611328,
1303
+ "rewards/rejected": -38.237247467041016,
1304
  "step": 800,
1305
+ "use_label": 2230.0
1306
  },
1307
  {
1308
  "epoch": 0.85,
1309
  "learning_rate": 8.440046565774158e-06,
1310
+ "logits/chosen": -2.308079481124878,
1311
+ "logits/rejected": -2.2213778495788574,
1312
+ "logps/chosen": -485.36602783203125,
1313
+ "logps/rejected": -505.19189453125,
1314
+ "loss": 0.0907,
1315
+ "pred_label": 10638.150390625,
1316
+ "rewards/accuracies": 0.6312500238418579,
1317
+ "rewards/chosen": -20.434207916259766,
1318
+ "rewards/margins": 7.203681945800781,
1319
+ "rewards/rejected": -27.637889862060547,
1320
  "step": 810,
1321
+ "use_label": 2243.85009765625
1322
  },
1323
  {
1324
  "epoch": 0.86,
1325
  "learning_rate": 7.857974388824214e-06,
1326
+ "logits/chosen": -2.3282344341278076,
1327
+ "logits/rejected": -2.243950366973877,
1328
+ "logps/chosen": -473.38262939453125,
1329
+ "logps/rejected": -566.6471557617188,
1330
+ "loss": 0.0883,
1331
+ "pred_label": 10780.900390625,
1332
+ "rewards/accuracies": 0.706250011920929,
1333
+ "rewards/chosen": -18.006444931030273,
1334
+ "rewards/margins": 14.502110481262207,
1335
+ "rewards/rejected": -32.5085563659668,
1336
  "step": 820,
1337
+ "use_label": 2261.10009765625
1338
  },
1339
  {
1340
  "epoch": 0.87,
1341
  "learning_rate": 7.275902211874273e-06,
1342
+ "logits/chosen": -2.345092296600342,
1343
+ "logits/rejected": -2.301511287689209,
1344
+ "logps/chosen": -431.64520263671875,
1345
+ "logps/rejected": -632.6573486328125,
1346
+ "loss": 0.0699,
1347
+ "pred_label": 10927.275390625,
1348
+ "rewards/accuracies": 0.75,
1349
+ "rewards/chosen": -16.88318634033203,
1350
+ "rewards/margins": 18.3623104095459,
1351
+ "rewards/rejected": -35.24549865722656,
1352
  "step": 830,
1353
+ "use_label": 2274.72509765625
1354
  },
1355
  {
1356
  "epoch": 0.88,
1357
  "learning_rate": 6.693830034924331e-06,
1358
+ "logits/chosen": -2.3140549659729004,
1359
+ "logits/rejected": -2.3112993240356445,
1360
+ "logps/chosen": -505.6224670410156,
1361
+ "logps/rejected": -544.6993408203125,
1362
+ "loss": 0.074,
1363
+ "pred_label": 11074.5498046875,
1364
+ "rewards/accuracies": 0.6499999761581421,
1365
+ "rewards/chosen": -23.76053237915039,
1366
+ "rewards/margins": 4.261009216308594,
1367
+ "rewards/rejected": -28.02153968811035,
1368
  "step": 840,
1369
+ "use_label": 2287.449951171875
1370
  },
1371
  {
1372
  "epoch": 0.89,
1373
  "learning_rate": 6.111757857974389e-06,
1374
+ "logits/chosen": -2.3638854026794434,
1375
+ "logits/rejected": -2.3075807094573975,
1376
+ "logps/chosen": -458.67181396484375,
1377
+ "logps/rejected": -516.2307739257812,
1378
+ "loss": 0.0849,
1379
+ "pred_label": 11220.900390625,
1380
+ "rewards/accuracies": 0.7562500238418579,
1381
+ "rewards/chosen": -16.364498138427734,
1382
+ "rewards/margins": 10.922462463378906,
1383
+ "rewards/rejected": -27.28696060180664,
1384
  "step": 850,
1385
+ "use_label": 2301.10009765625
1386
  },
1387
  {
1388
  "epoch": 0.9,
1389
  "learning_rate": 5.529685681024447e-06,
1390
+ "logits/chosen": -2.3312666416168213,
1391
+ "logits/rejected": -2.2601587772369385,
1392
+ "logps/chosen": -464.119140625,
1393
+ "logps/rejected": -580.7103881835938,
1394
+ "loss": 0.0727,
1395
+ "pred_label": 11369.724609375,
1396
+ "rewards/accuracies": 0.706250011920929,
1397
+ "rewards/chosen": -17.923574447631836,
1398
+ "rewards/margins": 12.89836597442627,
1399
+ "rewards/rejected": -30.821941375732422,
1400
  "step": 860,
1401
+ "use_label": 2312.27490234375
1402
  },
1403
  {
1404
  "epoch": 0.91,
1405
  "learning_rate": 4.947613504074506e-06,
1406
+ "logits/chosen": -2.3686623573303223,
1407
+ "logits/rejected": -2.2975738048553467,
1408
+ "logps/chosen": -374.78192138671875,
1409
+ "logps/rejected": -505.87152099609375,
1410
+ "loss": 0.0739,
1411
+ "pred_label": 11518.5751953125,
1412
+ "rewards/accuracies": 0.731249988079071,
1413
+ "rewards/chosen": -13.179441452026367,
1414
+ "rewards/margins": 13.382822036743164,
1415
+ "rewards/rejected": -26.562265396118164,
1416
  "step": 870,
1417
+ "use_label": 2323.425048828125
1418
  },
1419
  {
1420
  "epoch": 0.92,
1421
  "learning_rate": 4.3655413271245635e-06,
1422
+ "logits/chosen": -2.3770673274993896,
1423
+ "logits/rejected": -2.2960658073425293,
1424
+ "logps/chosen": -470.25439453125,
1425
+ "logps/rejected": -548.5438842773438,
1426
+ "loss": 0.0668,
1427
+ "pred_label": 11666.25,
1428
+ "rewards/accuracies": 0.7437499761581421,
1429
+ "rewards/chosen": -18.366519927978516,
1430
+ "rewards/margins": 10.322815895080566,
1431
+ "rewards/rejected": -28.689334869384766,
1432
  "step": 880,
1433
+ "use_label": 2335.75
1434
  },
1435
  {
1436
  "epoch": 0.93,
1437
  "learning_rate": 3.7834691501746217e-06,
1438
+ "logits/chosen": -2.3971574306488037,
1439
+ "logits/rejected": -2.3389389514923096,
1440
+ "logps/chosen": -445.2398986816406,
1441
+ "logps/rejected": -555.1053466796875,
1442
+ "loss": 0.0667,
1443
+ "pred_label": 11815.775390625,
1444
+ "rewards/accuracies": 0.706250011920929,
1445
+ "rewards/chosen": -16.39553451538086,
1446
+ "rewards/margins": 11.912240982055664,
1447
+ "rewards/rejected": -28.30777931213379,
1448
  "step": 890,
1449
+ "use_label": 2346.22509765625
1450
  },
1451
  {
1452
  "epoch": 0.94,
1453
  "learning_rate": 3.2013969732246805e-06,
1454
+ "logits/chosen": -2.279033660888672,
1455
+ "logits/rejected": -2.161271572113037,
1456
+ "logps/chosen": -450.6705627441406,
1457
+ "logps/rejected": -566.3276977539062,
1458
+ "loss": 0.0876,
1459
+ "pred_label": 11961.5751953125,
1460
+ "rewards/accuracies": 0.668749988079071,
1461
+ "rewards/chosen": -17.153247833251953,
1462
+ "rewards/margins": 13.627708435058594,
1463
+ "rewards/rejected": -30.780956268310547,
1464
  "step": 900,
1465
+ "use_label": 2360.425048828125
1466
  },
1467
  {
1468
  "epoch": 0.95,
1469
  "learning_rate": 2.6193247962747383e-06,
1470
+ "logits/chosen": -2.414407253265381,
1471
+ "logits/rejected": -2.305093288421631,
1472
+ "logps/chosen": -450.60595703125,
1473
+ "logps/rejected": -530.6917724609375,
1474
+ "loss": 0.0717,
1475
+ "pred_label": 12107.375,
1476
+ "rewards/accuracies": 0.731249988079071,
1477
+ "rewards/chosen": -15.418998718261719,
1478
+ "rewards/margins": 11.336746215820312,
1479
+ "rewards/rejected": -26.7557430267334,
1480
  "step": 910,
1481
+ "use_label": 2374.625
1482
  },
1483
  {
1484
  "epoch": 0.96,
1485
  "learning_rate": 2.037252619324796e-06,
1486
+ "logits/chosen": -2.346126079559326,
1487
+ "logits/rejected": -2.2459468841552734,
1488
+ "logps/chosen": -454.90740966796875,
1489
+ "logps/rejected": -556.7593994140625,
1490
+ "loss": 0.0626,
1491
+ "pred_label": 12256.9501953125,
1492
+ "rewards/accuracies": 0.699999988079071,
1493
+ "rewards/chosen": -15.801968574523926,
1494
+ "rewards/margins": 13.915499687194824,
1495
+ "rewards/rejected": -29.717464447021484,
1496
  "step": 920,
1497
+ "use_label": 2385.050048828125
1498
  },
1499
  {
1500
  "epoch": 0.97,
1501
  "learning_rate": 1.4551804423748545e-06,
1502
+ "logits/chosen": -2.400489091873169,
1503
+ "logits/rejected": -2.255152702331543,
1504
+ "logps/chosen": -392.2051086425781,
1505
+ "logps/rejected": -571.8855590820312,
1506
+ "loss": 0.0663,
1507
+ "pred_label": 12408.974609375,
1508
+ "rewards/accuracies": 0.8125,
1509
+ "rewards/chosen": -12.399200439453125,
1510
+ "rewards/margins": 20.609838485717773,
1511
+ "rewards/rejected": -33.00904083251953,
1512
  "step": 930,
1513
+ "use_label": 2393.02490234375
1514
  },
1515
  {
1516
  "epoch": 0.98,
1517
  "learning_rate": 8.731082654249127e-07,
1518
+ "logits/chosen": -2.398634195327759,
1519
+ "logits/rejected": -2.28908634185791,
1520
+ "logps/chosen": -459.11846923828125,
1521
+ "logps/rejected": -550.9254150390625,
1522
+ "loss": 0.0731,
1523
+ "pred_label": 12559.150390625,
1524
+ "rewards/accuracies": 0.71875,
1525
+ "rewards/chosen": -17.1726016998291,
1526
+ "rewards/margins": 11.231449127197266,
1527
+ "rewards/rejected": -28.404048919677734,
1528
  "step": 940,
1529
+ "use_label": 2402.85009765625
1530
  },
1531
  {
1532
  "epoch": 0.99,
1533
  "learning_rate": 2.910360884749709e-07,
1534
+ "logits/chosen": -2.373722791671753,
1535
+ "logits/rejected": -2.2480380535125732,
1536
+ "logps/chosen": -450.88623046875,
1537
+ "logps/rejected": -560.01220703125,
1538
+ "loss": 0.069,
1539
+ "pred_label": 12709.775390625,
1540
+ "rewards/accuracies": 0.6875,
1541
+ "rewards/chosen": -18.243160247802734,
1542
+ "rewards/margins": 12.46325397491455,
1543
+ "rewards/rejected": -30.7064151763916,
1544
  "step": 950,
1545
+ "use_label": 2412.22509765625
1546
  },
1547
  {
1548
  "epoch": 1.0,
1549
+ "eval_logits/chosen": -2.3870656490325928,
1550
+ "eval_logits/rejected": -2.274824380874634,
1551
+ "eval_logps/chosen": -441.0377502441406,
1552
+ "eval_logps/rejected": -538.9608764648438,
1553
+ "eval_loss": 0.07583338022232056,
1554
+ "eval_pred_label": 13084.7841796875,
1555
+ "eval_rewards/accuracies": 0.7080000042915344,
1556
+ "eval_rewards/chosen": -15.678034782409668,
1557
+ "eval_rewards/margins": 12.288079261779785,
1558
+ "eval_rewards/rejected": -27.966114044189453,
1559
+ "eval_runtime": 450.8813,
1560
+ "eval_samples_per_second": 4.436,
1561
+ "eval_steps_per_second": 0.277,
1562
+ "eval_use_label": 2447.216064453125,
1563
  "step": 955
1564
  },
1565
  {
1566
  "epoch": 1.0,
1567
  "step": 955,
1568
  "total_flos": 0.0,
1569
+ "train_loss": 0.12920980815488006,
1570
+ "train_runtime": 25162.7962,
1571
+ "train_samples_per_second": 2.43,
1572
  "train_steps_per_second": 0.038
1573
  }
1574
  ],