wzhouad commited on
Commit
e1301e1
1 Parent(s): 598ee87

Model save

Browse files
README.md CHANGED
@@ -35,7 +35,7 @@ The following hyperparameters were used during training:
35
  - learning_rate: 1e-06
36
  - train_batch_size: 2
37
  - eval_batch_size: 8
38
- - seed: 5
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
  - gradient_accumulation_steps: 8
 
35
  - learning_rate: 1e-06
36
  - train_batch_size: 2
37
  - eval_batch_size: 8
38
+ - seed: 1
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
  - gradient_accumulation_steps: 8
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 0.18376689068362362,
4
- "train_runtime": 14184.393,
5
- "train_samples": 62479,
6
- "train_samples_per_second": 8.81,
7
- "train_steps_per_second": 0.069
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 0.39703809490243847,
4
+ "train_runtime": 12665.7954,
5
+ "train_samples": 61134,
6
+ "train_samples_per_second": 9.653,
7
+ "train_steps_per_second": 0.075
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:556bc3ac94af42652ee93cbdb52da221bb36e9ff2c05b8205b5ce202e28c7a90
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aae0d51dec8f1db0c4185db4064e9e7cfa0f57226a7bdd1336e77d73fe07784
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b6305f04cb9ea11038dfb295a72f85ba3704689365f27e1d9f0df70e14593c9
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74342a40e020ecc68e1877af3bfbc8f6458eacd08a2befc496f6b74d4cc12e8
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a58dfc2efab9ea145c90c18bd178870035a4e9547fdeb65fbae277676849808f
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfcd78d697ab08df9806100c6023e029071e435e0071465482e099bcb80a107e
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35f2060af6bd81823593b6eba1e061e3f8acfb518b20cb49a1b2a1c447cb31b6
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:152de9df35d2cf2f89f72f30f0fcc0c733b787726e60d1a6e2659edcad51784a
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 0.18376689068362362,
4
- "train_runtime": 14184.393,
5
- "train_samples": 62479,
6
- "train_samples_per_second": 8.81,
7
- "train_steps_per_second": 0.069
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 0.39703809490243847,
4
+ "train_runtime": 12665.7954,
5
+ "train_samples": 61134,
6
+ "train_samples_per_second": 9.653,
7
+ "train_steps_per_second": 0.075
8
  }
trainer_state.json CHANGED
@@ -1,1383 +1,1355 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9994878361075545,
5
  "eval_steps": 10000,
6
- "global_step": 976,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
- "learning_rate": 1.0204081632653061e-07,
14
- "logits/chosen": 0.042739879339933395,
15
- "logits/rejected": 0.12301032245159149,
16
- "logps/chosen": -319.2601623535156,
17
- "logps/rejected": -236.07504272460938,
18
- "loss": 0.4266,
19
- "rewards/accuracies": 0.48750001192092896,
20
- "rewards/chosen": 0.0013425254728645086,
21
- "rewards/margins": 0.0014396004844456911,
22
- "rewards/rejected": -9.707514982437715e-05,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.04,
27
- "learning_rate": 2.0408163265306121e-07,
28
- "logits/chosen": 0.022067690268158913,
29
- "logits/rejected": 0.13554301857948303,
30
- "logps/chosen": -308.0990295410156,
31
- "logps/rejected": -199.53515625,
32
- "loss": 0.4239,
33
- "rewards/accuracies": 0.5,
34
- "rewards/chosen": -0.00030937520205043256,
35
- "rewards/margins": 0.00024947416386567056,
36
- "rewards/rejected": -0.000558849424123764,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.06,
41
- "learning_rate": 3.0612244897959183e-07,
42
- "logits/chosen": -0.01963012106716633,
43
- "logits/rejected": 0.08336172997951508,
44
- "logps/chosen": -341.10833740234375,
45
- "logps/rejected": -214.412353515625,
46
- "loss": 0.4128,
47
- "rewards/accuracies": 0.5874999761581421,
48
- "rewards/chosen": 0.0027676259633153677,
49
- "rewards/margins": 0.0053460500203073025,
50
- "rewards/rejected": -0.0025784247554838657,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.08,
55
- "learning_rate": 4.0816326530612243e-07,
56
- "logits/chosen": 0.05237439274787903,
57
- "logits/rejected": 0.11109952628612518,
58
- "logps/chosen": -338.07293701171875,
59
- "logps/rejected": -273.3377990722656,
60
- "loss": 0.4254,
61
- "rewards/accuracies": 0.550000011920929,
62
- "rewards/chosen": 0.0017787739634513855,
63
- "rewards/margins": 0.006533265113830566,
64
- "rewards/rejected": -0.004754491616040468,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.1,
69
- "learning_rate": 5.10204081632653e-07,
70
- "logits/chosen": 0.0345633402466774,
71
- "logits/rejected": 0.10821535438299179,
72
- "logps/chosen": -344.10369873046875,
73
- "logps/rejected": -230.9674530029297,
74
- "loss": 0.4343,
75
- "rewards/accuracies": 0.5375000238418579,
76
- "rewards/chosen": 0.0031904876232147217,
77
- "rewards/margins": 0.043094128370285034,
78
- "rewards/rejected": -0.039903637021780014,
79
  "step": 50
80
  },
81
  {
82
- "epoch": 0.12,
83
- "learning_rate": 6.122448979591837e-07,
84
- "logits/chosen": 0.07501409947872162,
85
- "logits/rejected": 0.13366985321044922,
86
- "logps/chosen": -318.9683837890625,
87
- "logps/rejected": -229.18203735351562,
88
- "loss": 0.4447,
89
- "rewards/accuracies": 0.6000000238418579,
90
- "rewards/chosen": -0.0012126276269555092,
91
- "rewards/margins": 0.07185648381710052,
92
- "rewards/rejected": -0.07306911051273346,
93
  "step": 60
94
  },
95
  {
96
- "epoch": 0.14,
97
- "learning_rate": 7.142857142857143e-07,
98
- "logits/chosen": 0.021035227924585342,
99
- "logits/rejected": 0.09495668858289719,
100
- "logps/chosen": -314.5700378417969,
101
- "logps/rejected": -253.2916259765625,
102
- "loss": 0.4576,
103
- "rewards/accuracies": 0.53125,
104
- "rewards/chosen": -0.042389269918203354,
105
- "rewards/margins": 0.06374682486057281,
106
- "rewards/rejected": -0.10613608360290527,
107
  "step": 70
108
  },
109
  {
110
- "epoch": 0.16,
111
- "learning_rate": 8.163265306122449e-07,
112
- "logits/chosen": 0.030707141384482384,
113
- "logits/rejected": 0.11692730337381363,
114
- "logps/chosen": -274.8445129394531,
115
- "logps/rejected": -206.48583984375,
116
- "loss": 0.4562,
117
- "rewards/accuracies": 0.5562499761581421,
118
- "rewards/chosen": -0.0059325797483325005,
119
- "rewards/margins": 0.09064096957445145,
120
- "rewards/rejected": -0.09657355397939682,
121
  "step": 80
122
  },
123
  {
124
- "epoch": 0.18,
125
- "learning_rate": 9.183673469387755e-07,
126
- "logits/chosen": 0.09560343623161316,
127
- "logits/rejected": 0.19490866363048553,
128
- "logps/chosen": -324.2842712402344,
129
- "logps/rejected": -250.9847412109375,
130
- "loss": 0.4561,
131
- "rewards/accuracies": 0.550000011920929,
132
- "rewards/chosen": -0.012979650869965553,
133
- "rewards/margins": 0.055127233266830444,
134
- "rewards/rejected": -0.06810688972473145,
135
  "step": 90
136
  },
137
  {
138
- "epoch": 0.2,
139
- "learning_rate": 9.999871970850593e-07,
140
- "logits/chosen": -0.030500883236527443,
141
- "logits/rejected": 0.05846525356173515,
142
- "logps/chosen": -325.1695251464844,
143
- "logps/rejected": -203.91525268554688,
144
- "loss": 0.4326,
145
  "rewards/accuracies": 0.643750011920929,
146
- "rewards/chosen": 0.05515174940228462,
147
- "rewards/margins": 0.17507116496562958,
148
- "rewards/rejected": -0.11991941928863525,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.23,
153
- "learning_rate": 9.995391639025223e-07,
154
- "logits/chosen": -0.016545545309782028,
155
- "logits/rejected": 0.09239337593317032,
156
- "logps/chosen": -390.1547546386719,
157
- "logps/rejected": -282.8218078613281,
158
- "loss": 0.431,
159
- "rewards/accuracies": 0.625,
160
- "rewards/chosen": 0.06177205964922905,
161
- "rewards/margins": 0.18079259991645813,
162
- "rewards/rejected": -0.11902053654193878,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.25,
167
- "learning_rate": 9.984516404805643e-07,
168
- "logits/chosen": -0.014985096640884876,
169
- "logits/rejected": 0.06358243525028229,
170
- "logps/chosen": -306.3666076660156,
171
- "logps/rejected": -214.81204223632812,
172
- "loss": 0.4418,
173
- "rewards/accuracies": 0.5874999761581421,
174
- "rewards/chosen": -0.015409037470817566,
175
- "rewards/margins": 0.1483658403158188,
176
- "rewards/rejected": -0.16377487778663635,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.27,
181
- "learning_rate": 9.967260190235684e-07,
182
- "logits/chosen": 0.038866154849529266,
183
- "logits/rejected": 0.13459371030330658,
184
- "logps/chosen": -325.4383544921875,
185
- "logps/rejected": -224.15823364257812,
186
- "loss": 0.4238,
187
- "rewards/accuracies": 0.643750011920929,
188
- "rewards/chosen": 0.04134940356016159,
189
- "rewards/margins": 0.20171746611595154,
190
- "rewards/rejected": -0.16036805510520935,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.29,
195
- "learning_rate": 9.943645086037324e-07,
196
- "logits/chosen": 0.06345884501934052,
197
- "logits/rejected": 0.13952302932739258,
198
- "logps/chosen": -277.39532470703125,
199
- "logps/rejected": -232.65121459960938,
200
- "loss": 0.4172,
201
- "rewards/accuracies": 0.612500011920929,
202
- "rewards/chosen": -0.020446954295039177,
203
- "rewards/margins": 0.11555153131484985,
204
- "rewards/rejected": -0.13599848747253418,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.31,
209
- "learning_rate": 9.913701323331023e-07,
210
- "logits/chosen": 0.003445350332185626,
211
- "logits/rejected": 0.08681725710630417,
212
- "logps/chosen": -317.0658264160156,
213
- "logps/rejected": -231.794921875,
214
- "loss": 0.4114,
215
- "rewards/accuracies": 0.6187499761581421,
216
- "rewards/chosen": -0.03761180490255356,
217
- "rewards/margins": 0.14787191152572632,
218
- "rewards/rejected": -0.18548373878002167,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.33,
223
- "learning_rate": 9.877467234935035e-07,
224
- "logits/chosen": 0.0008376747136935592,
225
- "logits/rejected": 0.09333285689353943,
226
- "logps/chosen": -335.82763671875,
227
- "logps/rejected": -246.57424926757812,
228
- "loss": 0.3868,
229
- "rewards/accuracies": 0.675000011920929,
230
- "rewards/chosen": -0.029308026656508446,
231
- "rewards/margins": 0.19365237653255463,
232
- "rewards/rejected": -0.22296042740345,
233
  "step": 160
234
  },
235
  {
236
- "epoch": 0.35,
237
- "learning_rate": 9.834989206293263e-07,
238
- "logits/chosen": -0.06711210310459137,
239
- "logits/rejected": -0.0017955079674720764,
240
- "logps/chosen": -349.3035583496094,
241
- "logps/rejected": -244.96127319335938,
242
- "loss": 0.3681,
243
- "rewards/accuracies": 0.668749988079071,
244
- "rewards/chosen": -0.028221551328897476,
245
- "rewards/margins": 0.18148021399974823,
246
- "rewards/rejected": -0.2097017467021942,
247
  "step": 170
248
  },
249
  {
250
- "epoch": 0.37,
251
- "learning_rate": 9.786321616094444e-07,
252
- "logits/chosen": -0.07065094262361526,
253
- "logits/rejected": 0.026075905188918114,
254
- "logps/chosen": -363.73248291015625,
255
- "logps/rejected": -258.8304748535156,
256
- "loss": 0.3586,
257
- "rewards/accuracies": 0.65625,
258
- "rewards/chosen": -0.06635953485965729,
259
- "rewards/margins": 0.23400752246379852,
260
- "rewards/rejected": -0.3003670573234558,
261
  "step": 180
262
  },
263
  {
264
- "epoch": 0.39,
265
- "learning_rate": 9.731526766658712e-07,
266
- "logits/chosen": -0.04519695043563843,
267
- "logits/rejected": 0.038596220314502716,
268
- "logps/chosen": -337.63934326171875,
269
- "logps/rejected": -238.10513305664062,
270
- "loss": 0.357,
271
- "rewards/accuracies": 0.6000000238418579,
272
- "rewards/chosen": -0.11399877071380615,
273
- "rewards/margins": 0.17908646166324615,
274
- "rewards/rejected": -0.2930852472782135,
275
  "step": 190
276
  },
277
  {
278
- "epoch": 0.41,
279
- "learning_rate": 9.670674804180633e-07,
280
- "logits/chosen": -0.012042679823935032,
281
- "logits/rejected": 0.05385655164718628,
282
- "logps/chosen": -322.5794677734375,
283
- "logps/rejected": -235.0473175048828,
284
- "loss": 0.3455,
285
- "rewards/accuracies": 0.731249988079071,
286
- "rewards/chosen": -0.078927181661129,
287
- "rewards/margins": 0.27467840909957886,
288
- "rewards/rejected": -0.35360556840896606,
289
  "step": 200
290
  },
291
  {
292
- "epoch": 0.43,
293
- "learning_rate": 9.603843628930827e-07,
294
- "logits/chosen": -0.07379743456840515,
295
- "logits/rejected": 0.06694406270980835,
296
- "logps/chosen": -364.69140625,
297
- "logps/rejected": -298.93377685546875,
298
- "loss": 0.3541,
299
- "rewards/accuracies": 0.59375,
300
- "rewards/chosen": -0.19018980860710144,
301
- "rewards/margins": 0.26650452613830566,
302
- "rewards/rejected": -0.4566943645477295,
303
  "step": 210
304
  },
305
  {
306
- "epoch": 0.45,
307
- "learning_rate": 9.531118795531135e-07,
308
- "logits/chosen": -0.10667786747217178,
309
- "logits/rejected": -0.021370600908994675,
310
- "logps/chosen": -357.7416076660156,
311
- "logps/rejected": -271.3829650878906,
312
- "loss": 0.36,
313
- "rewards/accuracies": 0.6812499761581421,
314
- "rewards/chosen": -0.16868090629577637,
315
- "rewards/margins": 0.34508660435676575,
316
- "rewards/rejected": -0.5137674808502197,
317
  "step": 220
318
  },
319
  {
320
- "epoch": 0.47,
321
- "learning_rate": 9.452593403430978e-07,
322
- "logits/chosen": -0.0755738765001297,
323
- "logits/rejected": 0.022949038073420525,
324
- "logps/chosen": -342.61431884765625,
325
- "logps/rejected": -272.8531188964844,
326
- "loss": 0.3224,
327
- "rewards/accuracies": 0.65625,
328
- "rewards/chosen": -0.2765820324420929,
329
- "rewards/margins": 0.2683432996273041,
330
- "rewards/rejected": -0.544925332069397,
331
  "step": 230
332
  },
333
  {
334
- "epoch": 0.49,
335
- "learning_rate": 9.368367977725124e-07,
336
- "logits/chosen": -0.017795735970139503,
337
- "logits/rejected": 0.05103006213903427,
338
- "logps/chosen": -312.7308349609375,
339
- "logps/rejected": -252.4365692138672,
340
- "loss": 0.298,
341
  "rewards/accuracies": 0.6812499761581421,
342
- "rewards/chosen": -0.3175797462463379,
343
- "rewards/margins": 0.25021839141845703,
344
- "rewards/rejected": -0.5677981376647949,
345
  "step": 240
346
  },
347
  {
348
- "epoch": 0.51,
349
- "learning_rate": 9.278550340465468e-07,
350
- "logits/chosen": -0.03922083601355553,
351
- "logits/rejected": 0.019568433985114098,
352
- "logps/chosen": -403.1310729980469,
353
- "logps/rejected": -325.88922119140625,
354
- "loss": 0.298,
355
- "rewards/accuracies": 0.6000000238418579,
356
- "rewards/chosen": -0.43214789032936096,
357
- "rewards/margins": 0.2559090554714203,
358
- "rewards/rejected": -0.6880569458007812,
359
  "step": 250
360
  },
361
  {
362
- "epoch": 0.53,
363
- "learning_rate": 9.183255472631486e-07,
364
- "logits/chosen": 0.014770155772566795,
365
- "logits/rejected": 0.116851806640625,
366
- "logps/chosen": -369.81854248046875,
367
- "logps/rejected": -313.6183166503906,
368
- "loss": 0.2743,
369
- "rewards/accuracies": 0.6499999761581421,
370
- "rewards/chosen": -0.5007920265197754,
371
- "rewards/margins": 0.26597368717193604,
372
- "rewards/rejected": -0.7667657136917114,
373
  "step": 260
374
  },
375
  {
376
- "epoch": 0.55,
377
- "learning_rate": 9.082605366936167e-07,
378
- "logits/chosen": -0.05128796771168709,
379
- "logits/rejected": 0.07006199657917023,
380
- "logps/chosen": -372.8706970214844,
381
- "logps/rejected": -289.9878234863281,
382
- "loss": 0.2897,
383
  "rewards/accuracies": 0.706250011920929,
384
- "rewards/chosen": -0.5824490785598755,
385
- "rewards/margins": 0.2600293755531311,
386
- "rewards/rejected": -0.8424784541130066,
387
  "step": 270
388
  },
389
  {
390
- "epoch": 0.57,
391
- "learning_rate": 8.976728871655761e-07,
392
- "logits/chosen": -0.08541660755872726,
393
- "logits/rejected": -0.03784799575805664,
394
- "logps/chosen": -334.4608154296875,
395
- "logps/rejected": -338.2781677246094,
396
- "loss": 0.2881,
397
- "rewards/accuracies": 0.6499999761581421,
398
- "rewards/chosen": -0.5974677801132202,
399
- "rewards/margins": 0.2813887596130371,
400
- "rewards/rejected": -0.8788564801216125,
401
  "step": 280
402
  },
403
  {
404
- "epoch": 0.59,
405
- "learning_rate": 8.865761525683329e-07,
406
- "logits/chosen": 0.026905322447419167,
407
- "logits/rejected": 0.09259426593780518,
408
- "logps/chosen": -409.39971923828125,
409
- "logps/rejected": -328.29595947265625,
410
- "loss": 0.2746,
411
- "rewards/accuracies": 0.6812499761581421,
412
- "rewards/chosen": -0.6293879747390747,
413
- "rewards/margins": 0.4001205563545227,
414
- "rewards/rejected": -1.029508352279663,
415
  "step": 290
416
  },
417
  {
418
- "epoch": 0.61,
419
- "learning_rate": 8.749845385017221e-07,
420
- "logits/chosen": -0.014166781678795815,
421
- "logits/rejected": 0.07386626303195953,
422
- "logps/chosen": -396.84552001953125,
423
- "logps/rejected": -301.03887939453125,
424
- "loss": 0.2593,
425
- "rewards/accuracies": 0.675000011920929,
426
- "rewards/chosen": -0.6409090757369995,
427
- "rewards/margins": 0.3970302641391754,
428
- "rewards/rejected": -1.0379393100738525,
429
  "step": 300
430
  },
431
  {
432
- "epoch": 0.64,
433
- "learning_rate": 8.629128840906621e-07,
434
- "logits/chosen": 0.030825147405266762,
435
- "logits/rejected": 0.09442819654941559,
436
- "logps/chosen": -342.51470947265625,
437
- "logps/rejected": -295.8294677734375,
438
- "loss": 0.2628,
439
- "rewards/accuracies": 0.5625,
440
- "rewards/chosen": -0.7313823103904724,
441
- "rewards/margins": 0.28571024537086487,
442
- "rewards/rejected": -1.0170925855636597,
443
  "step": 310
444
  },
445
  {
446
- "epoch": 0.66,
447
- "learning_rate": 8.50376642988695e-07,
448
- "logits/chosen": -0.07539083808660507,
449
- "logits/rejected": 0.007513365242630243,
450
- "logps/chosen": -441.4029235839844,
451
- "logps/rejected": -334.5185241699219,
452
- "loss": 0.2442,
453
- "rewards/accuracies": 0.6625000238418579,
454
- "rewards/chosen": -0.6911054849624634,
455
- "rewards/margins": 0.3970246911048889,
456
- "rewards/rejected": -1.088129997253418,
457
  "step": 320
458
  },
459
  {
460
- "epoch": 0.68,
461
- "learning_rate": 8.373918635948309e-07,
462
- "logits/chosen": 0.006354253739118576,
463
- "logits/rejected": 0.08419916778802872,
464
- "logps/chosen": -403.6936950683594,
465
- "logps/rejected": -321.6915588378906,
466
- "loss": 0.2302,
467
- "rewards/accuracies": 0.699999988079071,
468
- "rewards/chosen": -0.8267976641654968,
469
- "rewards/margins": 0.3896764814853668,
470
- "rewards/rejected": -1.216473937034607,
471
  "step": 330
472
  },
473
  {
474
- "epoch": 0.7,
475
- "learning_rate": 8.239751685090253e-07,
476
- "logits/chosen": -0.025699462741613388,
477
- "logits/rejected": 0.01821485161781311,
478
- "logps/chosen": -375.48321533203125,
479
- "logps/rejected": -353.6884460449219,
480
- "loss": 0.2414,
481
- "rewards/accuracies": 0.6312500238418579,
482
- "rewards/chosen": -0.8354716300964355,
483
- "rewards/margins": 0.3944741189479828,
484
- "rewards/rejected": -1.2299458980560303,
485
  "step": 340
486
  },
487
  {
488
- "epoch": 0.72,
489
- "learning_rate": 8.101437332525837e-07,
490
- "logits/chosen": -0.0900038480758667,
491
- "logits/rejected": -0.017262550070881844,
492
- "logps/chosen": -378.4132080078125,
493
- "logps/rejected": -320.1848449707031,
494
- "loss": 0.2473,
495
- "rewards/accuracies": 0.6312500238418579,
496
- "rewards/chosen": -0.5553010702133179,
497
- "rewards/margins": 0.3320138454437256,
498
- "rewards/rejected": -0.8873149752616882,
499
  "step": 350
500
  },
501
  {
502
- "epoch": 0.74,
503
- "learning_rate": 7.95915264280741e-07,
504
- "logits/chosen": -0.01971466653048992,
505
- "logits/rejected": 0.013573974370956421,
506
- "logps/chosen": -364.09027099609375,
507
- "logps/rejected": -350.51678466796875,
508
- "loss": 0.2514,
509
- "rewards/accuracies": 0.606249988079071,
510
- "rewards/chosen": -0.6879822611808777,
511
- "rewards/margins": 0.3725909888744354,
512
- "rewards/rejected": -1.0605733394622803,
513
  "step": 360
514
  },
515
  {
516
- "epoch": 0.76,
517
- "learning_rate": 7.813079763155586e-07,
518
- "logits/chosen": -0.12065862119197845,
519
- "logits/rejected": -0.06556924432516098,
520
- "logps/chosen": -399.515625,
521
- "logps/rejected": -334.27947998046875,
522
- "loss": 0.2314,
523
- "rewards/accuracies": 0.625,
524
- "rewards/chosen": -0.8629879951477051,
525
- "rewards/margins": 0.33520084619522095,
526
- "rewards/rejected": -1.1981887817382812,
527
  "step": 370
528
  },
529
  {
530
- "epoch": 0.78,
531
- "learning_rate": 7.663405690281601e-07,
532
- "logits/chosen": -0.12202272564172745,
533
- "logits/rejected": -0.06837549060583115,
534
- "logps/chosen": -442.2171936035156,
535
- "logps/rejected": -403.5753173828125,
536
- "loss": 0.2184,
537
- "rewards/accuracies": 0.637499988079071,
538
- "rewards/chosen": -1.0298798084259033,
539
- "rewards/margins": 0.3311373293399811,
540
- "rewards/rejected": -1.361017107963562,
541
  "step": 380
542
  },
543
  {
544
- "epoch": 0.8,
545
- "learning_rate": 7.510322031001522e-07,
546
- "logits/chosen": -0.11861888319253922,
547
- "logits/rejected": -0.08702222257852554,
548
- "logps/chosen": -420.78131103515625,
549
- "logps/rejected": -360.6201477050781,
550
- "loss": 0.2244,
551
- "rewards/accuracies": 0.6187499761581421,
552
- "rewards/chosen": -1.0124976634979248,
553
- "rewards/margins": 0.31882134079933167,
554
- "rewards/rejected": -1.331318974494934,
555
  "step": 390
556
  },
557
  {
558
- "epoch": 0.82,
559
- "learning_rate": 7.354024756948805e-07,
560
- "logits/chosen": -0.07149451971054077,
561
- "logits/rejected": -0.06954298913478851,
562
- "logps/chosen": -411.802001953125,
563
- "logps/rejected": -384.7701721191406,
564
- "loss": 0.2089,
565
- "rewards/accuracies": 0.668749988079071,
566
- "rewards/chosen": -1.1301209926605225,
567
- "rewards/margins": 0.42858514189720154,
568
- "rewards/rejected": -1.5587060451507568,
569
  "step": 400
570
  },
571
  {
572
- "epoch": 0.84,
573
- "learning_rate": 7.194713953699171e-07,
574
- "logits/chosen": -0.1280289590358734,
575
- "logits/rejected": -0.09473087638616562,
576
- "logps/chosen": -435.06121826171875,
577
- "logps/rejected": -366.338134765625,
578
- "loss": 0.1954,
579
- "rewards/accuracies": 0.668749988079071,
580
- "rewards/chosen": -0.7566291093826294,
581
- "rewards/margins": 0.5598937273025513,
582
- "rewards/rejected": -1.3165228366851807,
583
  "step": 410
584
  },
585
  {
586
- "epoch": 0.86,
587
- "learning_rate": 7.032593564628982e-07,
588
- "logits/chosen": -0.10041435062885284,
589
- "logits/rejected": -0.07590119540691376,
590
- "logps/chosen": -401.24176025390625,
591
- "logps/rejected": -337.0291442871094,
592
- "loss": 0.2226,
593
- "rewards/accuracies": 0.59375,
594
- "rewards/chosen": -1.0773589611053467,
595
- "rewards/margins": 0.3265494704246521,
596
- "rewards/rejected": -1.403908371925354,
597
  "step": 420
598
  },
599
  {
600
- "epoch": 0.88,
601
- "learning_rate": 6.867871129835019e-07,
602
- "logits/chosen": -0.11637461185455322,
603
- "logits/rejected": -0.03979887440800667,
604
- "logps/chosen": -412.41485595703125,
605
- "logps/rejected": -385.03607177734375,
606
- "loss": 0.2177,
607
- "rewards/accuracies": 0.6625000238418579,
608
- "rewards/chosen": -1.0018829107284546,
609
- "rewards/margins": 0.38611629605293274,
610
- "rewards/rejected": -1.3879992961883545,
611
  "step": 430
612
  },
613
  {
614
- "epoch": 0.9,
615
- "learning_rate": 6.700757520449873e-07,
616
- "logits/chosen": -0.1311188042163849,
617
- "logits/rejected": -0.1057361364364624,
618
- "logps/chosen": -369.38665771484375,
619
- "logps/rejected": -357.85626220703125,
620
- "loss": 0.2488,
621
- "rewards/accuracies": 0.668749988079071,
622
- "rewards/chosen": -0.7600846886634827,
623
- "rewards/margins": 0.39686957001686096,
624
- "rewards/rejected": -1.156954288482666,
625
  "step": 440
626
  },
627
  {
628
- "epoch": 0.92,
629
- "learning_rate": 6.531466668693071e-07,
630
- "logits/chosen": -0.15897420048713684,
631
- "logits/rejected": -0.05242576450109482,
632
- "logps/chosen": -380.21759033203125,
633
- "logps/rejected": -322.91632080078125,
634
- "loss": 0.245,
635
- "rewards/accuracies": 0.581250011920929,
636
- "rewards/chosen": -0.7816046476364136,
637
- "rewards/margins": 0.2559036612510681,
638
- "rewards/rejected": -1.0375083684921265,
639
  "step": 450
640
  },
641
  {
642
- "epoch": 0.94,
643
- "learning_rate": 6.360215294003538e-07,
644
- "logits/chosen": -0.1271231323480606,
645
- "logits/rejected": -0.0517185814678669,
646
- "logps/chosen": -425.27294921875,
647
- "logps/rejected": -356.8155822753906,
648
- "loss": 0.2339,
649
- "rewards/accuracies": 0.6875,
650
- "rewards/chosen": -0.7662349939346313,
651
- "rewards/margins": 0.45115765929222107,
652
- "rewards/rejected": -1.2173926830291748,
653
  "step": 460
654
  },
655
  {
656
- "epoch": 0.96,
657
- "learning_rate": 6.187222625603957e-07,
658
- "logits/chosen": -0.0355108268558979,
659
- "logits/rejected": 0.04621673375368118,
660
- "logps/chosen": -381.2696533203125,
661
- "logps/rejected": -301.10491943359375,
662
- "loss": 0.2399,
663
- "rewards/accuracies": 0.699999988079071,
664
- "rewards/chosen": -0.8189173936843872,
665
- "rewards/margins": 0.4319301247596741,
666
- "rewards/rejected": -1.2508474588394165,
667
  "step": 470
668
  },
669
  {
670
- "epoch": 0.98,
671
- "learning_rate": 6.012710121852204e-07,
672
- "logits/chosen": -0.17021189630031586,
673
- "logits/rejected": -0.05065950006246567,
674
- "logps/chosen": -412.6038513183594,
675
- "logps/rejected": -330.48748779296875,
676
- "loss": 0.2516,
677
- "rewards/accuracies": 0.625,
678
- "rewards/chosen": -0.9672134518623352,
679
- "rewards/margins": 0.29545700550079346,
680
- "rewards/rejected": -1.2626702785491943,
681
  "step": 480
682
  },
683
  {
684
- "epoch": 1.0,
685
- "learning_rate": 5.83690118673914e-07,
686
- "logits/chosen": -0.09540718048810959,
687
- "logits/rejected": -0.017056141048669815,
688
- "logps/chosen": -419.115234375,
689
- "logps/rejected": -312.2616882324219,
690
- "loss": 0.2245,
691
- "rewards/accuracies": 0.6312500238418579,
692
- "rewards/chosen": -0.8349027633666992,
693
- "rewards/margins": 0.36838024854660034,
694
- "rewards/rejected": -1.2032830715179443,
695
  "step": 490
696
  },
697
  {
698
- "epoch": 1.02,
699
- "learning_rate": 5.660020883895668e-07,
700
- "logits/chosen": -0.016647344455122948,
701
- "logits/rejected": 0.017840882763266563,
702
- "logps/chosen": -399.71563720703125,
703
- "logps/rejected": -388.9092712402344,
704
- "loss": 0.1404,
705
  "rewards/accuracies": 0.762499988079071,
706
- "rewards/chosen": -0.9009947776794434,
707
- "rewards/margins": 0.7734060287475586,
708
- "rewards/rejected": -1.6744006872177124,
709
  "step": 500
710
  },
711
  {
712
- "epoch": 1.04,
713
- "learning_rate": 5.482295648475203e-07,
714
- "logits/chosen": -0.16914696991443634,
715
- "logits/rejected": -0.11097099632024765,
716
- "logps/chosen": -493.1507873535156,
717
- "logps/rejected": -437.85467529296875,
718
- "loss": 0.0864,
719
- "rewards/accuracies": 0.71875,
720
- "rewards/chosen": -1.517225742340088,
721
- "rewards/margins": 0.6982406377792358,
722
- "rewards/rejected": -2.215466260910034,
723
  "step": 510
724
  },
725
  {
726
- "epoch": 1.07,
727
- "learning_rate": 5.303952997280354e-07,
728
- "logits/chosen": -0.21788661181926727,
729
- "logits/rejected": -0.11950629949569702,
730
- "logps/chosen": -522.3275146484375,
731
- "logps/rejected": -467.064453125,
732
- "loss": 0.0653,
733
- "rewards/accuracies": 0.71875,
734
- "rewards/chosen": -2.158579111099243,
735
- "rewards/margins": 0.6498184204101562,
736
- "rewards/rejected": -2.8083975315093994,
737
  "step": 520
738
  },
739
  {
740
- "epoch": 1.09,
741
- "learning_rate": 5.12522123750494e-07,
742
- "logits/chosen": -0.019628793001174927,
743
- "logits/rejected": 0.05569322034716606,
744
- "logps/chosen": -419.7838439941406,
745
- "logps/rejected": -402.3912353515625,
746
- "loss": 0.0756,
747
- "rewards/accuracies": 0.7437499761581421,
748
- "rewards/chosen": -1.4739632606506348,
749
- "rewards/margins": 0.6645950078964233,
750
- "rewards/rejected": -2.1385583877563477,
751
  "step": 530
752
  },
753
  {
754
- "epoch": 1.11,
755
- "learning_rate": 4.946329174464158e-07,
756
- "logits/chosen": -0.04415629059076309,
757
- "logits/rejected": -0.04439578205347061,
758
- "logps/chosen": -526.9125366210938,
759
- "logps/rejected": -545.0748901367188,
760
- "loss": 0.0642,
761
- "rewards/accuracies": 0.768750011920929,
762
- "rewards/chosen": -2.040071487426758,
763
- "rewards/margins": 0.9352254867553711,
764
- "rewards/rejected": -2.975297212600708,
765
  "step": 540
766
  },
767
  {
768
- "epoch": 1.13,
769
- "learning_rate": 4.767505818687094e-07,
770
- "logits/chosen": -0.11534661054611206,
771
- "logits/rejected": -0.023562278598546982,
772
- "logps/chosen": -563.6917114257812,
773
- "logps/rejected": -548.306640625,
774
- "loss": 0.055,
775
- "rewards/accuracies": 0.6875,
776
- "rewards/chosen": -2.295139789581299,
777
- "rewards/margins": 0.6967246532440186,
778
- "rewards/rejected": -2.9918646812438965,
779
  "step": 550
780
  },
781
  {
782
- "epoch": 1.15,
783
- "learning_rate": 4.588980092746518e-07,
784
- "logits/chosen": -0.12250219285488129,
785
- "logits/rejected": -0.08656288683414459,
786
- "logps/chosen": -559.2648315429688,
787
- "logps/rejected": -574.6068725585938,
788
- "loss": 0.0542,
789
  "rewards/accuracies": 0.793749988079071,
790
- "rewards/chosen": -2.3971827030181885,
791
- "rewards/margins": 1.0699354410171509,
792
- "rewards/rejected": -3.46711802482605,
793
  "step": 560
794
  },
795
  {
796
- "epoch": 1.17,
797
- "learning_rate": 4.410980538201281e-07,
798
- "logits/chosen": -0.12177082151174545,
799
- "logits/rejected": -0.011621433310210705,
800
- "logps/chosen": -588.0823974609375,
801
- "logps/rejected": -604.1260986328125,
802
- "loss": 0.0462,
803
- "rewards/accuracies": 0.7562500238418579,
804
- "rewards/chosen": -2.376652479171753,
805
- "rewards/margins": 1.0704455375671387,
806
- "rewards/rejected": -3.4470982551574707,
807
  "step": 570
808
  },
809
  {
810
- "epoch": 1.19,
811
- "learning_rate": 4.233735023026463e-07,
812
- "logits/chosen": -0.04475090652704239,
813
- "logits/rejected": 0.028914233669638634,
814
- "logps/chosen": -530.6985473632812,
815
- "logps/rejected": -551.475830078125,
816
- "loss": 0.0484,
817
- "rewards/accuracies": 0.737500011920929,
818
- "rewards/chosen": -2.4700865745544434,
819
- "rewards/margins": 0.8258007168769836,
820
- "rewards/rejected": -3.295886993408203,
821
  "step": 580
822
  },
823
  {
824
- "epoch": 1.21,
825
- "learning_rate": 4.05747044990583e-07,
826
- "logits/chosen": 0.040291767567396164,
827
- "logits/rejected": 0.12547969818115234,
828
- "logps/chosen": -570.7107543945312,
829
- "logps/rejected": -546.1327514648438,
830
- "loss": 0.0512,
831
- "rewards/accuracies": 0.8187500238418579,
832
- "rewards/chosen": -2.200223207473755,
833
- "rewards/margins": 1.1106585264205933,
834
- "rewards/rejected": -3.3108818531036377,
835
  "step": 590
836
  },
837
  {
838
- "epoch": 1.23,
839
- "learning_rate": 3.882412465760009e-07,
840
- "logits/chosen": -0.07208960503339767,
841
- "logits/rejected": 0.08110973984003067,
842
- "logps/chosen": -629.0553588867188,
843
- "logps/rejected": -583.1513671875,
844
- "loss": 0.0521,
845
- "rewards/accuracies": 0.6937500238418579,
846
- "rewards/chosen": -2.517319440841675,
847
- "rewards/margins": 0.9306985139846802,
848
- "rewards/rejected": -3.4480183124542236,
849
  "step": 600
850
  },
851
  {
852
- "epoch": 1.25,
853
- "learning_rate": 3.70878517288224e-07,
854
- "logits/chosen": 0.15000824630260468,
855
- "logits/rejected": 0.2208786904811859,
856
- "logps/chosen": -548.018798828125,
857
- "logps/rejected": -527.3434448242188,
858
- "loss": 0.0473,
859
- "rewards/accuracies": 0.7250000238418579,
860
- "rewards/chosen": -2.3518996238708496,
861
- "rewards/margins": 0.7622156143188477,
862
- "rewards/rejected": -3.1141154766082764,
863
  "step": 610
864
  },
865
  {
866
- "epoch": 1.27,
867
- "learning_rate": 3.536810842051503e-07,
868
- "logits/chosen": 0.1049819216132164,
869
- "logits/rejected": 0.15174145996570587,
870
- "logps/chosen": -583.921630859375,
871
- "logps/rejected": -534.0410766601562,
872
- "loss": 0.051,
873
- "rewards/accuracies": 0.731249988079071,
874
- "rewards/chosen": -2.2783203125,
875
- "rewards/margins": 1.0053035020828247,
876
- "rewards/rejected": -3.2836239337921143,
877
  "step": 620
878
  },
879
  {
880
- "epoch": 1.29,
881
- "learning_rate": 3.366709627990279e-07,
882
- "logits/chosen": 0.1325691193342209,
883
- "logits/rejected": 0.21458351612091064,
884
- "logps/chosen": -542.7774658203125,
885
- "logps/rejected": -566.5474853515625,
886
- "loss": 0.0474,
887
- "rewards/accuracies": 0.762499988079071,
888
- "rewards/chosen": -2.380779504776001,
889
- "rewards/margins": 1.0634496212005615,
890
- "rewards/rejected": -3.4442291259765625,
891
  "step": 630
892
  },
893
  {
894
- "epoch": 1.31,
895
- "learning_rate": 3.198699287531197e-07,
896
- "logits/chosen": 0.22405290603637695,
897
- "logits/rejected": 0.38790056109428406,
898
- "logps/chosen": -511.52813720703125,
899
- "logps/rejected": -502.40594482421875,
900
- "loss": 0.0407,
901
- "rewards/accuracies": 0.706250011920929,
902
- "rewards/chosen": -2.1854372024536133,
903
- "rewards/margins": 0.8840430974960327,
904
- "rewards/rejected": -3.0694804191589355,
905
  "step": 640
906
  },
907
  {
908
- "epoch": 1.33,
909
- "learning_rate": 3.0329949008533717e-07,
910
- "logits/chosen": 0.298289954662323,
911
- "logits/rejected": 0.3011283874511719,
912
- "logps/chosen": -536.1112060546875,
913
- "logps/rejected": -537.1763916015625,
914
- "loss": 0.0429,
915
- "rewards/accuracies": 0.800000011920929,
916
- "rewards/chosen": -2.2640860080718994,
917
- "rewards/margins": 1.017397165298462,
918
- "rewards/rejected": -3.2814831733703613,
919
  "step": 650
920
  },
921
  {
922
- "epoch": 1.35,
923
- "learning_rate": 2.869808596145272e-07,
924
- "logits/chosen": 0.06549053639173508,
925
- "logits/rejected": 0.1074862852692604,
926
- "logps/chosen": -594.4285278320312,
927
- "logps/rejected": -558.3255615234375,
928
- "loss": 0.0429,
929
- "rewards/accuracies": 0.8500000238418579,
930
- "rewards/chosen": -2.2776365280151367,
931
- "rewards/margins": 1.2041267156600952,
932
- "rewards/rejected": -3.4817633628845215,
933
  "step": 660
934
  },
935
  {
936
- "epoch": 1.37,
937
- "learning_rate": 2.7093492780466355e-07,
938
- "logits/chosen": 0.18175141513347626,
939
- "logits/rejected": 0.26680392026901245,
940
- "logps/chosen": -579.2008666992188,
941
- "logps/rejected": -590.2774658203125,
942
- "loss": 0.0467,
943
  "rewards/accuracies": 0.768750011920929,
944
- "rewards/chosen": -2.1934049129486084,
945
- "rewards/margins": 1.090174913406372,
946
- "rewards/rejected": -3.2835800647735596,
947
  "step": 670
948
  },
949
  {
950
- "epoch": 1.39,
951
- "learning_rate": 2.551822360217013e-07,
952
- "logits/chosen": 0.15081962943077087,
953
- "logits/rejected": 0.20529961585998535,
954
- "logps/chosen": -553.8475341796875,
955
- "logps/rejected": -606.9322509765625,
956
- "loss": 0.0436,
957
- "rewards/accuracies": 0.7875000238418579,
958
- "rewards/chosen": -2.3112692832946777,
959
- "rewards/margins": 1.0072476863861084,
960
- "rewards/rejected": -3.318516492843628,
961
  "step": 680
962
  },
963
  {
964
- "epoch": 1.41,
965
- "learning_rate": 2.397429502373358e-07,
966
- "logits/chosen": 0.23068766295909882,
967
- "logits/rejected": 0.34180352091789246,
968
- "logps/chosen": -623.3653564453125,
969
- "logps/rejected": -575.7601318359375,
970
- "loss": 0.0418,
971
- "rewards/accuracies": 0.71875,
972
- "rewards/chosen": -2.5824313163757324,
973
- "rewards/margins": 0.9573659896850586,
974
- "rewards/rejected": -3.539797306060791,
975
  "step": 690
976
  },
977
  {
978
- "epoch": 1.43,
979
- "learning_rate": 2.2463683521332372e-07,
980
- "logits/chosen": 0.1991245001554489,
981
- "logits/rejected": 0.2743477523326874,
982
- "logps/chosen": -590.1685180664062,
983
- "logps/rejected": -624.405517578125,
984
- "loss": 0.0402,
985
- "rewards/accuracies": 0.7562500238418579,
986
- "rewards/chosen": -2.624359607696533,
987
- "rewards/margins": 1.0163304805755615,
988
- "rewards/rejected": -3.640690326690674,
989
  "step": 700
990
  },
991
  {
992
- "epoch": 1.45,
993
- "learning_rate": 2.098832291994188e-07,
994
- "logits/chosen": 0.13439445197582245,
995
- "logits/rejected": 0.3505890965461731,
996
- "logps/chosen": -532.9124755859375,
997
- "logps/rejected": -533.11328125,
998
- "loss": 0.0425,
999
- "rewards/accuracies": 0.7124999761581421,
1000
- "rewards/chosen": -2.428898811340332,
1001
- "rewards/margins": 0.8328410983085632,
1002
- "rewards/rejected": -3.261739730834961,
1003
  "step": 710
1004
  },
1005
  {
1006
- "epoch": 1.48,
1007
- "learning_rate": 1.9550101917731164e-07,
1008
- "logits/chosen": 0.2612428069114685,
1009
- "logits/rejected": 0.4181596338748932,
1010
- "logps/chosen": -548.5943603515625,
1011
- "logps/rejected": -520.1036376953125,
1012
- "loss": 0.0426,
1013
- "rewards/accuracies": 0.6875,
1014
- "rewards/chosen": -2.282651424407959,
1015
- "rewards/margins": 0.897473931312561,
1016
- "rewards/rejected": -3.1801254749298096,
1017
  "step": 720
1018
  },
1019
  {
1020
- "epoch": 1.5,
1021
- "learning_rate": 1.8150861668226304e-07,
1022
- "logits/chosen": 0.16462299227714539,
1023
- "logits/rejected": 0.255173921585083,
1024
- "logps/chosen": -613.513916015625,
1025
- "logps/rejected": -615.7657470703125,
1026
- "loss": 0.0412,
1027
- "rewards/accuracies": 0.7875000238418579,
1028
- "rewards/chosen": -2.5054032802581787,
1029
- "rewards/margins": 1.1505638360977173,
1030
- "rewards/rejected": -3.6559672355651855,
1031
  "step": 730
1032
  },
1033
  {
1034
- "epoch": 1.52,
1035
- "learning_rate": 1.6792393423338668e-07,
1036
- "logits/chosen": 0.3190282881259918,
1037
- "logits/rejected": 0.3565825819969177,
1038
- "logps/chosen": -511.28857421875,
1039
- "logps/rejected": -533.2688598632812,
1040
- "loss": 0.0373,
1041
- "rewards/accuracies": 0.7437499761581421,
1042
- "rewards/chosen": -2.4052300453186035,
1043
- "rewards/margins": 1.0499953031539917,
1044
- "rewards/rejected": -3.4552254676818848,
1045
  "step": 740
1046
  },
1047
  {
1048
- "epoch": 1.54,
1049
- "learning_rate": 1.5476436240275343e-07,
1050
- "logits/chosen": 0.21081598103046417,
1051
- "logits/rejected": 0.26332369446754456,
1052
- "logps/chosen": -571.9532470703125,
1053
- "logps/rejected": -675.26171875,
1054
- "loss": 0.0365,
1055
- "rewards/accuracies": 0.84375,
1056
- "rewards/chosen": -2.571671485900879,
1057
- "rewards/margins": 1.5944334268569946,
1058
- "rewards/rejected": -4.166104793548584,
1059
  "step": 750
1060
  },
1061
  {
1062
- "epoch": 1.56,
1063
- "learning_rate": 1.4204674755266789e-07,
1064
- "logits/chosen": 0.2154209166765213,
1065
- "logits/rejected": 0.3505161702632904,
1066
- "logps/chosen": -569.5103759765625,
1067
- "logps/rejected": -614.9661865234375,
1068
- "loss": 0.041,
1069
- "rewards/accuracies": 0.800000011920929,
1070
- "rewards/chosen": -2.5760412216186523,
1071
- "rewards/margins": 1.1412320137023926,
1072
- "rewards/rejected": -3.717272996902466,
1073
  "step": 760
1074
  },
1075
  {
1076
- "epoch": 1.58,
1077
- "learning_rate": 1.2978737026962455e-07,
1078
- "logits/chosen": 0.37023162841796875,
1079
- "logits/rejected": 0.5293506979942322,
1080
- "logps/chosen": -518.3146362304688,
1081
- "logps/rejected": -529.2200317382812,
1082
- "loss": 0.0364,
1083
- "rewards/accuracies": 0.768750011920929,
1084
- "rewards/chosen": -2.416443109512329,
1085
- "rewards/margins": 0.8256433606147766,
1086
- "rewards/rejected": -3.242086410522461,
1087
  "step": 770
1088
  },
1089
  {
1090
- "epoch": 1.6,
1091
- "learning_rate": 1.1800192452254626e-07,
1092
- "logits/chosen": 0.27624231576919556,
1093
- "logits/rejected": 0.46870145201683044,
1094
- "logps/chosen": -591.7103271484375,
1095
- "logps/rejected": -581.9118041992188,
1096
- "loss": 0.0362,
1097
- "rewards/accuracies": 0.793749988079071,
1098
- "rewards/chosen": -2.4794836044311523,
1099
- "rewards/margins": 1.137441635131836,
1100
- "rewards/rejected": -3.6169254779815674,
1101
  "step": 780
1102
  },
1103
  {
1104
- "epoch": 1.62,
1105
- "learning_rate": 1.0670549757198632e-07,
1106
- "logits/chosen": 0.3313957750797272,
1107
- "logits/rejected": 0.3926454484462738,
1108
- "logps/chosen": -572.6036376953125,
1109
- "logps/rejected": -623.8525390625,
1110
- "loss": 0.0347,
1111
- "rewards/accuracies": 0.8187500238418579,
1112
- "rewards/chosen": -2.5637197494506836,
1113
- "rewards/margins": 1.4623756408691406,
1114
- "rewards/rejected": -4.026095390319824,
1115
  "step": 790
1116
  },
1117
  {
1118
- "epoch": 1.64,
1119
- "learning_rate": 9.591255065601611e-08,
1120
- "logits/chosen": 0.36058443784713745,
1121
- "logits/rejected": 0.4308810234069824,
1122
- "logps/chosen": -660.1174926757812,
1123
- "logps/rejected": -651.1835327148438,
1124
- "loss": 0.0311,
1125
- "rewards/accuracies": 0.7437499761581421,
1126
- "rewards/chosen": -3.1758742332458496,
1127
- "rewards/margins": 0.9529415369033813,
1128
- "rewards/rejected": -4.128815650939941,
1129
  "step": 800
1130
  },
1131
- {
1132
- "epoch": 1.66,
1133
- "learning_rate": 8.563690047752148e-08,
1134
- "logits/chosen": 0.20595593750476837,
1135
- "logits/rejected": 0.3465935289859772,
1136
- "logps/chosen": -603.9895629882812,
1137
- "logps/rejected": -659.3458862304688,
1138
- "loss": 0.0281,
1139
- "rewards/accuracies": 0.800000011920929,
1140
- "rewards/chosen": -3.0521774291992188,
1141
- "rewards/margins": 1.057278037071228,
1142
- "rewards/rejected": -4.109455585479736,
1143
- "step": 810
1144
- },
1145
- {
1146
- "epoch": 1.68,
1147
- "learning_rate": 7.589170151660656e-08,
1148
- "logits/chosen": 0.2923261523246765,
1149
- "logits/rejected": 0.49081581830978394,
1150
- "logps/chosen": -580.5206909179688,
1151
- "logps/rejected": -572.1239013671875,
1152
- "loss": 0.03,
1153
- "rewards/accuracies": 0.71875,
1154
- "rewards/chosen": -2.961920976638794,
1155
- "rewards/margins": 0.8716068267822266,
1156
- "rewards/rejected": -3.8335273265838623,
1157
- "step": 820
1158
- },
1159
  {
1160
  "epoch": 1.7,
1161
- "learning_rate": 6.668942919074993e-08,
1162
- "logits/chosen": 0.292153537273407,
1163
- "logits/rejected": 0.432597815990448,
1164
- "logps/chosen": -573.1793823242188,
1165
- "logps/rejected": -590.8929443359375,
1166
- "loss": 0.0308,
1167
- "rewards/accuracies": 0.7437499761581421,
1168
- "rewards/chosen": -2.854015588760376,
1169
- "rewards/margins": 1.1199266910552979,
1170
- "rewards/rejected": -3.973942518234253,
1171
- "step": 830
1172
  },
1173
  {
1174
  "epoch": 1.72,
1175
- "learning_rate": 5.804186388427051e-08,
1176
- "logits/chosen": 0.24940094351768494,
1177
- "logits/rejected": 0.41758427023887634,
1178
- "logps/chosen": -652.5003662109375,
1179
- "logps/rejected": -634.2739868164062,
1180
- "loss": 0.0314,
1181
- "rewards/accuracies": 0.6625000238418579,
1182
- "rewards/chosen": -3.0366127490997314,
1183
- "rewards/margins": 0.8191956281661987,
1184
- "rewards/rejected": -3.8558082580566406,
1185
- "step": 840
1186
  },
1187
  {
1188
  "epoch": 1.74,
1189
- "learning_rate": 4.996007586754497e-08,
1190
- "logits/chosen": 0.22354039549827576,
1191
- "logits/rejected": 0.37779122591018677,
1192
- "logps/chosen": -606.2194213867188,
1193
- "logps/rejected": -642.6550903320312,
1194
- "loss": 0.0296,
1195
- "rewards/accuracies": 0.75,
1196
- "rewards/chosen": -2.9501092433929443,
1197
- "rewards/margins": 1.118649959564209,
1198
- "rewards/rejected": -4.068758964538574,
1199
- "step": 850
1200
  },
1201
  {
1202
  "epoch": 1.76,
1203
- "learning_rate": 4.245441112528714e-08,
1204
- "logits/chosen": 0.4196823239326477,
1205
- "logits/rejected": 0.4304388165473938,
1206
- "logps/chosen": -577.064208984375,
1207
- "logps/rejected": -602.9406127929688,
1208
- "loss": 0.0342,
1209
- "rewards/accuracies": 0.768750011920929,
1210
- "rewards/chosen": -2.622218370437622,
1211
- "rewards/margins": 1.2330958843231201,
1212
- "rewards/rejected": -3.855314254760742,
1213
- "step": 860
1214
  },
1215
  {
1216
  "epoch": 1.78,
1217
- "learning_rate": 3.5534478112028756e-08,
1218
- "logits/chosen": 0.22348478436470032,
1219
- "logits/rejected": 0.37131160497665405,
1220
- "logps/chosen": -624.7162475585938,
1221
- "logps/rejected": -609.2520751953125,
1222
- "loss": 0.0332,
1223
- "rewards/accuracies": 0.768750011920929,
1224
- "rewards/chosen": -2.5426559448242188,
1225
- "rewards/margins": 1.3100887537002563,
1226
- "rewards/rejected": -3.8527445793151855,
1227
- "step": 870
1228
  },
1229
  {
1230
  "epoch": 1.8,
1231
- "learning_rate": 2.920913545175585e-08,
1232
- "logits/chosen": 0.268731027841568,
1233
- "logits/rejected": 0.4273291528224945,
1234
- "logps/chosen": -670.083984375,
1235
- "logps/rejected": -628.88330078125,
1236
- "loss": 0.034,
1237
- "rewards/accuracies": 0.7437499761581421,
1238
- "rewards/chosen": -2.7054009437561035,
1239
- "rewards/margins": 1.0910000801086426,
1240
- "rewards/rejected": -3.796401262283325,
1241
- "step": 880
1242
  },
1243
  {
1244
  "epoch": 1.82,
1245
- "learning_rate": 2.3486480597450233e-08,
1246
- "logits/chosen": 0.43142691254615784,
1247
- "logits/rejected": 0.5881059169769287,
1248
- "logps/chosen": -598.4923706054688,
1249
- "logps/rejected": -542.4967041015625,
1250
- "loss": 0.0355,
1251
- "rewards/accuracies": 0.7124999761581421,
1252
- "rewards/chosen": -2.5859270095825195,
1253
- "rewards/margins": 0.8642128109931946,
1254
- "rewards/rejected": -3.4501395225524902,
1255
- "step": 890
1256
  },
1257
  {
1258
  "epoch": 1.84,
1259
- "learning_rate": 1.837383946505078e-08,
1260
- "logits/chosen": 0.30923277139663696,
1261
- "logits/rejected": 0.3812534511089325,
1262
- "logps/chosen": -534.3175048828125,
1263
- "logps/rejected": -591.940185546875,
1264
- "loss": 0.038,
1265
  "rewards/accuracies": 0.731249988079071,
1266
- "rewards/chosen": -2.7851271629333496,
1267
- "rewards/margins": 1.1137316226959229,
1268
- "rewards/rejected": -3.8988590240478516,
1269
- "step": 900
1270
  },
1271
  {
1272
  "epoch": 1.86,
1273
- "learning_rate": 1.3877757055106131e-08,
1274
- "logits/chosen": 0.37112337350845337,
1275
- "logits/rejected": 0.3969642221927643,
1276
- "logps/chosen": -580.228271484375,
1277
- "logps/rejected": -596.5545654296875,
1278
- "loss": 0.0348,
1279
- "rewards/accuracies": 0.7437499761581421,
1280
- "rewards/chosen": -2.8731327056884766,
1281
- "rewards/margins": 1.1270229816436768,
1282
- "rewards/rejected": -4.000155925750732,
1283
- "step": 910
1284
  },
1285
  {
1286
  "epoch": 1.88,
1287
- "learning_rate": 1.0003989074124531e-08,
1288
- "logits/chosen": 0.298672080039978,
1289
- "logits/rejected": 0.4021454453468323,
1290
- "logps/chosen": -587.4090576171875,
1291
- "logps/rejected": -554.2926025390625,
1292
- "loss": 0.0353,
1293
- "rewards/accuracies": 0.737500011920929,
1294
- "rewards/chosen": -2.7385663986206055,
1295
- "rewards/margins": 0.8734287023544312,
1296
- "rewards/rejected": -3.611995220184326,
1297
- "step": 920
1298
  },
1299
  {
1300
  "epoch": 1.91,
1301
- "learning_rate": 6.757494566346444e-09,
1302
- "logits/chosen": 0.23999682068824768,
1303
- "logits/rejected": 0.3655754029750824,
1304
- "logps/chosen": -563.5718994140625,
1305
- "logps/rejected": -580.5650634765625,
1306
- "loss": 0.0392,
1307
- "rewards/accuracies": 0.7875000238418579,
1308
- "rewards/chosen": -2.609893321990967,
1309
- "rewards/margins": 1.1768059730529785,
1310
- "rewards/rejected": -3.7866992950439453,
1311
- "step": 930
1312
  },
1313
  {
1314
  "epoch": 1.93,
1315
- "learning_rate": 4.142429565372529e-09,
1316
- "logits/chosen": 0.20008695125579834,
1317
- "logits/rejected": 0.4519767165184021,
1318
- "logps/chosen": -639.9691772460938,
1319
- "logps/rejected": -581.4786376953125,
1320
- "loss": 0.0335,
1321
- "rewards/accuracies": 0.6937500238418579,
1322
- "rewards/chosen": -2.76491117477417,
1323
- "rewards/margins": 0.7661231160163879,
1324
- "rewards/rejected": -3.531034469604492,
1325
- "step": 940
1326
  },
1327
  {
1328
  "epoch": 1.95,
1329
- "learning_rate": 2.1621417737743287e-09,
1330
- "logits/chosen": 0.288802832365036,
1331
- "logits/rejected": 0.38709038496017456,
1332
- "logps/chosen": -602.63916015625,
1333
- "logps/rejected": -614.2210693359375,
1334
- "loss": 0.0362,
1335
- "rewards/accuracies": 0.800000011920929,
1336
- "rewards/chosen": -2.629371166229248,
1337
- "rewards/margins": 1.192918062210083,
1338
- "rewards/rejected": -3.822288990020752,
1339
- "step": 950
1340
  },
1341
  {
1342
  "epoch": 1.97,
1343
- "learning_rate": 8.191662774980623e-10,
1344
- "logits/chosen": 0.3423166573047638,
1345
- "logits/rejected": 0.37124723196029663,
1346
- "logps/chosen": -607.4952392578125,
1347
- "logps/rejected": -593.841064453125,
1348
- "loss": 0.0359,
1349
- "rewards/accuracies": 0.7562500238418579,
1350
- "rewards/chosen": -2.386164903640747,
1351
- "rewards/margins": 1.163062572479248,
1352
- "rewards/rejected": -3.549227476119995,
1353
- "step": 960
1354
  },
1355
  {
1356
  "epoch": 1.99,
1357
- "learning_rate": 1.1522230054794579e-10,
1358
- "logits/chosen": 0.1892043799161911,
1359
- "logits/rejected": 0.3423364460468292,
1360
- "logps/chosen": -628.3257446289062,
1361
- "logps/rejected": -607.3165893554688,
1362
- "loss": 0.0375,
1363
- "rewards/accuracies": 0.731249988079071,
1364
- "rewards/chosen": -2.810192823410034,
1365
- "rewards/margins": 1.0157359838485718,
1366
- "rewards/rejected": -3.8259284496307373,
1367
- "step": 970
1368
  },
1369
  {
1370
  "epoch": 2.0,
1371
- "step": 976,
1372
  "total_flos": 0.0,
1373
- "train_loss": 0.18376689068362362,
1374
- "train_runtime": 14184.393,
1375
- "train_samples_per_second": 8.81,
1376
- "train_steps_per_second": 0.069
1377
  }
1378
  ],
1379
  "logging_steps": 10,
1380
- "max_steps": 976,
1381
  "num_train_epochs": 2,
1382
  "save_steps": 10000,
1383
  "total_flos": 0.0,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9973828840617638,
5
  "eval_steps": 10000,
6
+ "global_step": 954,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
+ "learning_rate": 1.0416666666666667e-07,
14
+ "logits/chosen": 0.17733711004257202,
15
+ "logits/rejected": 0.2543194591999054,
16
+ "logps/chosen": -354.2892150878906,
17
+ "logps/rejected": -305.198974609375,
18
+ "loss": 0.5,
19
+ "rewards/accuracies": 0.40625,
20
+ "rewards/chosen": 0.001110748155042529,
21
+ "rewards/margins": 0.0009211620199494064,
22
+ "rewards/rejected": 0.0001895862224046141,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.04,
27
+ "learning_rate": 2.0833333333333333e-07,
28
+ "logits/chosen": 0.0703902393579483,
29
+ "logits/rejected": 0.19805452227592468,
30
+ "logps/chosen": -316.7099609375,
31
+ "logps/rejected": -276.1195373535156,
32
+ "loss": 0.4999,
33
+ "rewards/accuracies": 0.543749988079071,
34
+ "rewards/chosen": 0.0002531521604396403,
35
+ "rewards/margins": 0.001093686674721539,
36
+ "rewards/rejected": -0.0008405345724895597,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.06,
41
+ "learning_rate": 3.1249999999999997e-07,
42
+ "logits/chosen": 0.1768152415752411,
43
+ "logits/rejected": 0.24860361218452454,
44
+ "logps/chosen": -294.9255676269531,
45
+ "logps/rejected": -298.43670654296875,
46
+ "loss": 0.4995,
47
+ "rewards/accuracies": 0.59375,
48
+ "rewards/chosen": 0.0004133238398935646,
49
+ "rewards/margins": 0.0018008403712883592,
50
+ "rewards/rejected": -0.001387516618706286,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.08,
55
+ "learning_rate": 4.1666666666666667e-07,
56
+ "logits/chosen": 0.09639827907085419,
57
+ "logits/rejected": 0.2179565727710724,
58
+ "logps/chosen": -346.4007263183594,
59
+ "logps/rejected": -320.07464599609375,
60
+ "loss": 0.4982,
61
+ "rewards/accuracies": 0.643750011920929,
62
+ "rewards/chosen": 0.011715102009475231,
63
+ "rewards/margins": 0.0077905962243676186,
64
+ "rewards/rejected": 0.003924505319446325,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.1,
69
+ "learning_rate": 5.208333333333334e-07,
70
+ "logits/chosen": 0.14820781350135803,
71
+ "logits/rejected": 0.23618468642234802,
72
+ "logps/chosen": -308.26483154296875,
73
+ "logps/rejected": -283.5643310546875,
74
+ "loss": 0.496,
75
+ "rewards/accuracies": 0.6812499761581421,
76
+ "rewards/chosen": 0.022877948358654976,
77
+ "rewards/margins": 0.021698923781514168,
78
+ "rewards/rejected": 0.0011790238786488771,
79
  "step": 50
80
  },
81
  {
82
+ "epoch": 0.13,
83
+ "learning_rate": 6.249999999999999e-07,
84
+ "logits/chosen": 0.12726549804210663,
85
+ "logits/rejected": 0.2663690447807312,
86
+ "logps/chosen": -289.76361083984375,
87
+ "logps/rejected": -275.6463623046875,
88
+ "loss": 0.4922,
89
+ "rewards/accuracies": 0.6875,
90
+ "rewards/chosen": 0.031151825562119484,
91
+ "rewards/margins": 0.033161893486976624,
92
+ "rewards/rejected": -0.0020100646652281284,
93
  "step": 60
94
  },
95
  {
96
+ "epoch": 0.15,
97
+ "learning_rate": 7.291666666666666e-07,
98
+ "logits/chosen": 0.1551138460636139,
99
+ "logits/rejected": 0.2375030517578125,
100
+ "logps/chosen": -323.76788330078125,
101
+ "logps/rejected": -318.1293029785156,
102
+ "loss": 0.4879,
103
+ "rewards/accuracies": 0.59375,
104
+ "rewards/chosen": 0.059557922184467316,
105
+ "rewards/margins": 0.05137287825345993,
106
+ "rewards/rejected": 0.008185049518942833,
107
  "step": 70
108
  },
109
  {
110
+ "epoch": 0.17,
111
+ "learning_rate": 8.333333333333333e-07,
112
+ "logits/chosen": 0.22313980758190155,
113
+ "logits/rejected": 0.31770533323287964,
114
+ "logps/chosen": -312.06854248046875,
115
+ "logps/rejected": -291.3954162597656,
116
+ "loss": 0.4823,
117
+ "rewards/accuracies": 0.7250000238418579,
118
+ "rewards/chosen": 0.06595131754875183,
119
+ "rewards/margins": 0.10923006385564804,
120
+ "rewards/rejected": -0.04327874630689621,
121
  "step": 80
122
  },
123
  {
124
+ "epoch": 0.19,
125
+ "learning_rate": 9.374999999999999e-07,
126
+ "logits/chosen": 0.111533522605896,
127
+ "logits/rejected": 0.25437992811203003,
128
+ "logps/chosen": -331.6070556640625,
129
+ "logps/rejected": -276.5615234375,
130
+ "loss": 0.4763,
131
+ "rewards/accuracies": 0.6812499761581421,
132
+ "rewards/chosen": 0.05334480479359627,
133
+ "rewards/margins": 0.14821472764015198,
134
+ "rewards/rejected": -0.09486991912126541,
135
  "step": 90
136
  },
137
  {
138
+ "epoch": 0.21,
139
+ "learning_rate": 9.999463737538052e-07,
140
+ "logits/chosen": 0.19283509254455566,
141
+ "logits/rejected": 0.36483508348464966,
142
+ "logps/chosen": -334.4452819824219,
143
+ "logps/rejected": -312.9904479980469,
144
+ "loss": 0.4681,
145
  "rewards/accuracies": 0.643750011920929,
146
+ "rewards/chosen": 0.0011617511045187712,
147
+ "rewards/margins": 0.20724515616893768,
148
+ "rewards/rejected": -0.20608338713645935,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.23,
153
+ "learning_rate": 9.993432105822034e-07,
154
+ "logits/chosen": 0.21006178855895996,
155
+ "logits/rejected": 0.2403305023908615,
156
+ "logps/chosen": -329.19293212890625,
157
+ "logps/rejected": -345.82330322265625,
158
+ "loss": 0.457,
159
+ "rewards/accuracies": 0.675000011920929,
160
+ "rewards/chosen": -0.16573527455329895,
161
+ "rewards/margins": 0.2979966104030609,
162
+ "rewards/rejected": -0.4637318551540375,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.25,
167
+ "learning_rate": 9.980706626858607e-07,
168
+ "logits/chosen": 0.15046869218349457,
169
+ "logits/rejected": 0.21053044497966766,
170
+ "logps/chosen": -360.9029846191406,
171
+ "logps/rejected": -405.44732666015625,
172
+ "loss": 0.46,
173
+ "rewards/accuracies": 0.637499988079071,
174
+ "rewards/chosen": -0.40349340438842773,
175
+ "rewards/margins": 0.43901604413986206,
176
+ "rewards/rejected": -0.8425094485282898,
177
  "step": 120
178
  },
179
  {
180
  "epoch": 0.27,
181
+ "learning_rate": 9.961304359538434e-07,
182
+ "logits/chosen": 0.13695412874221802,
183
+ "logits/rejected": 0.2686176002025604,
184
+ "logps/chosen": -424.5625915527344,
185
+ "logps/rejected": -432.45745849609375,
186
+ "loss": 0.4508,
187
+ "rewards/accuracies": 0.6312500238418579,
188
+ "rewards/chosen": -0.8606651425361633,
189
+ "rewards/margins": 0.6154058575630188,
190
+ "rewards/rejected": -1.4760708808898926,
191
  "step": 130
192
  },
193
  {
194
  "epoch": 0.29,
195
+ "learning_rate": 9.935251313189563e-07,
196
+ "logits/chosen": 0.08335243165493011,
197
+ "logits/rejected": 0.23106291890144348,
198
+ "logps/chosen": -490.5091247558594,
199
+ "logps/rejected": -535.1593017578125,
200
+ "loss": 0.4468,
201
+ "rewards/accuracies": 0.6312500238418579,
202
+ "rewards/chosen": -1.5846575498580933,
203
+ "rewards/margins": 1.0678659677505493,
204
+ "rewards/rejected": -2.6525235176086426,
205
  "step": 140
206
  },
207
  {
208
  "epoch": 0.31,
209
+ "learning_rate": 9.902582412711118e-07,
210
+ "logits/chosen": 0.16554813086986542,
211
+ "logits/rejected": 0.20584425330162048,
212
+ "logps/chosen": -427.4266052246094,
213
+ "logps/rejected": -530.4254150390625,
214
+ "loss": 0.445,
215
+ "rewards/accuracies": 0.71875,
216
+ "rewards/chosen": -0.9212606549263,
217
+ "rewards/margins": 1.2334972620010376,
218
+ "rewards/rejected": -2.1547577381134033,
219
  "step": 150
220
  },
221
  {
222
  "epoch": 0.33,
223
+ "learning_rate": 9.86334145175542e-07,
224
+ "logits/chosen": 0.08956819772720337,
225
+ "logits/rejected": 0.10614663362503052,
226
+ "logps/chosen": -355.2901916503906,
227
+ "logps/rejected": -487.94354248046875,
228
+ "loss": 0.4399,
229
+ "rewards/accuracies": 0.731249988079071,
230
+ "rewards/chosen": -0.5322867631912231,
231
+ "rewards/margins": 1.5401604175567627,
232
+ "rewards/rejected": -2.0724472999572754,
233
  "step": 160
234
  },
235
  {
236
+ "epoch": 0.36,
237
+ "learning_rate": 9.817581034021272e-07,
238
+ "logits/chosen": 0.022985249757766724,
239
+ "logits/rejected": 0.06110963970422745,
240
+ "logps/chosen": -431.2767639160156,
241
+ "logps/rejected": -524.8790893554688,
242
+ "loss": 0.4341,
243
+ "rewards/accuracies": 0.699999988079071,
244
+ "rewards/chosen": -0.9490836262702942,
245
+ "rewards/margins": 1.221215009689331,
246
+ "rewards/rejected": -2.1702985763549805,
247
  "step": 170
248
  },
249
  {
250
+ "epoch": 0.38,
251
+ "learning_rate": 9.765362502737097e-07,
252
+ "logits/chosen": -0.04698944836854935,
253
+ "logits/rejected": -0.01610407792031765,
254
+ "logps/chosen": -415.97393798828125,
255
+ "logps/rejected": -554.08837890625,
256
+ "loss": 0.4314,
257
+ "rewards/accuracies": 0.706250011920929,
258
+ "rewards/chosen": -0.8994191288948059,
259
+ "rewards/margins": 1.6427128314971924,
260
+ "rewards/rejected": -2.5421319007873535,
261
  "step": 180
262
  },
263
  {
264
+ "epoch": 0.4,
265
+ "learning_rate": 9.706755858428485e-07,
266
+ "logits/chosen": 0.12246842682361603,
267
+ "logits/rejected": 0.13095493614673615,
268
+ "logps/chosen": -418.01751708984375,
269
+ "logps/rejected": -506.07421875,
270
+ "loss": 0.4235,
271
+ "rewards/accuracies": 0.7124999761581421,
272
+ "rewards/chosen": -0.8335806131362915,
273
+ "rewards/margins": 1.0999118089675903,
274
+ "rewards/rejected": -1.9334923028945923,
275
  "step": 190
276
  },
277
  {
278
+ "epoch": 0.42,
279
+ "learning_rate": 9.641839665080363e-07,
280
+ "logits/chosen": 0.11251389980316162,
281
+ "logits/rejected": 0.08771563321352005,
282
+ "logps/chosen": -444.196533203125,
283
+ "logps/rejected": -665.639892578125,
284
+ "loss": 0.419,
285
+ "rewards/accuracies": 0.75,
286
+ "rewards/chosen": -1.0370674133300781,
287
+ "rewards/margins": 2.6780648231506348,
288
+ "rewards/rejected": -3.7151317596435547,
289
  "step": 200
290
  },
291
  {
292
+ "epoch": 0.44,
293
+ "learning_rate": 9.570700944819582e-07,
294
+ "logits/chosen": 0.16030333936214447,
295
+ "logits/rejected": 0.11628633737564087,
296
+ "logps/chosen": -400.63427734375,
297
+ "logps/rejected": -635.213134765625,
298
+ "loss": 0.4265,
299
+ "rewards/accuracies": 0.731249988079071,
300
+ "rewards/chosen": -0.8647702932357788,
301
+ "rewards/margins": 2.788623332977295,
302
+ "rewards/rejected": -3.6533939838409424,
303
  "step": 210
304
  },
305
  {
306
+ "epoch": 0.46,
307
+ "learning_rate": 9.493435061259129e-07,
308
+ "logits/chosen": -0.053914181888103485,
309
+ "logits/rejected": -0.0958121120929718,
310
+ "logps/chosen": -501.2230529785156,
311
+ "logps/rejected": -713.2017211914062,
312
+ "loss": 0.4207,
313
+ "rewards/accuracies": 0.65625,
314
+ "rewards/chosen": -1.8454395532608032,
315
+ "rewards/margins": 2.649871826171875,
316
+ "rewards/rejected": -4.495311260223389,
317
  "step": 220
318
  },
319
  {
320
+ "epoch": 0.48,
321
+ "learning_rate": 9.4101455916603e-07,
322
+ "logits/chosen": 0.024957913905382156,
323
+ "logits/rejected": -0.08018078655004501,
324
+ "logps/chosen": -582.2233276367188,
325
+ "logps/rejected": -891.3052978515625,
326
+ "loss": 0.4177,
327
+ "rewards/accuracies": 0.675000011920929,
328
+ "rewards/chosen": -2.561619758605957,
329
+ "rewards/margins": 3.5202267169952393,
330
+ "rewards/rejected": -6.081845760345459,
331
  "step": 230
332
  },
333
  {
334
+ "epoch": 0.5,
335
+ "learning_rate": 9.320944188084241e-07,
336
+ "logits/chosen": 0.06781353056430817,
337
+ "logits/rejected": -0.011409667320549488,
338
+ "logps/chosen": -590.6744384765625,
339
+ "logps/rejected": -886.1702880859375,
340
+ "loss": 0.4203,
341
  "rewards/accuracies": 0.6812499761581421,
342
+ "rewards/chosen": -2.787226915359497,
343
+ "rewards/margins": 3.05540132522583,
344
+ "rewards/rejected": -5.842628479003906,
345
  "step": 240
346
  },
347
  {
348
+ "epoch": 0.52,
349
+ "learning_rate": 9.225950427718974e-07,
350
+ "logits/chosen": 0.17237909138202667,
351
+ "logits/rejected": 0.08455310761928558,
352
+ "logps/chosen": -487.7764587402344,
353
+ "logps/rejected": -983.6060791015625,
354
+ "loss": 0.4162,
355
+ "rewards/accuracies": 0.699999988079071,
356
+ "rewards/chosen": -1.7572829723358154,
357
+ "rewards/margins": 5.258338451385498,
358
+ "rewards/rejected": -7.015621185302734,
359
  "step": 250
360
  },
361
  {
362
+ "epoch": 0.54,
363
+ "learning_rate": 9.125291652582547e-07,
364
+ "logits/chosen": 0.04219328239560127,
365
+ "logits/rejected": 0.006076293531805277,
366
+ "logps/chosen": -587.68212890625,
367
+ "logps/rejected": -869.4441528320312,
368
+ "loss": 0.4232,
369
+ "rewards/accuracies": 0.643750011920929,
370
+ "rewards/chosen": -2.330383777618408,
371
+ "rewards/margins": 3.363435745239258,
372
+ "rewards/rejected": -5.693819522857666,
373
  "step": 260
374
  },
375
  {
376
+ "epoch": 0.57,
377
+ "learning_rate": 9.019102798817195e-07,
378
+ "logits/chosen": 0.1859409064054489,
379
+ "logits/rejected": -0.06210414692759514,
380
+ "logps/chosen": -533.7982177734375,
381
+ "logps/rejected": -985.5645751953125,
382
+ "loss": 0.4114,
383
  "rewards/accuracies": 0.706250011920929,
384
+ "rewards/chosen": -2.0466434955596924,
385
+ "rewards/margins": 4.704514503479004,
386
+ "rewards/rejected": -6.751158237457275,
387
  "step": 270
388
  },
389
  {
390
+ "epoch": 0.59,
391
+ "learning_rate": 8.90752621580335e-07,
392
+ "logits/chosen": 0.19852975010871887,
393
+ "logits/rejected": 0.030256235972046852,
394
+ "logps/chosen": -513.59033203125,
395
+ "logps/rejected": -937.9052734375,
396
+ "loss": 0.4156,
397
+ "rewards/accuracies": 0.6875,
398
+ "rewards/chosen": -2.1710736751556396,
399
+ "rewards/margins": 4.297308444976807,
400
+ "rewards/rejected": -6.468382358551025,
401
  "step": 280
402
  },
403
  {
404
+ "epoch": 0.61,
405
+ "learning_rate": 8.79071147533597e-07,
406
+ "logits/chosen": 0.14935357868671417,
407
+ "logits/rejected": -0.028311902657151222,
408
+ "logps/chosen": -705.8804321289062,
409
+ "logps/rejected": -1405.2919921875,
410
+ "loss": 0.4052,
411
+ "rewards/accuracies": 0.706250011920929,
412
+ "rewards/chosen": -3.5944924354553223,
413
+ "rewards/margins": 7.230493068695068,
414
+ "rewards/rejected": -10.824986457824707,
415
  "step": 290
416
  },
417
  {
418
+ "epoch": 0.63,
419
+ "learning_rate": 8.668815171119019e-07,
420
+ "logits/chosen": 0.2377752959728241,
421
+ "logits/rejected": 0.054682862013578415,
422
+ "logps/chosen": -492.9376525878906,
423
+ "logps/rejected": -1074.2073974609375,
424
+ "loss": 0.4129,
425
+ "rewards/accuracies": 0.762499988079071,
426
+ "rewards/chosen": -2.011826515197754,
427
+ "rewards/margins": 5.496823310852051,
428
+ "rewards/rejected": -7.5086493492126465,
429
  "step": 300
430
  },
431
  {
432
+ "epoch": 0.65,
433
+ "learning_rate": 8.54200070884685e-07,
434
+ "logits/chosen": 0.2463679015636444,
435
+ "logits/rejected": -0.016352087259292603,
436
+ "logps/chosen": -525.9217529296875,
437
+ "logps/rejected": -1137.378662109375,
438
+ "loss": 0.4032,
439
+ "rewards/accuracies": 0.699999988079071,
440
+ "rewards/chosen": -2.3019001483917236,
441
+ "rewards/margins": 5.954804420471191,
442
+ "rewards/rejected": -8.25670337677002,
443
  "step": 310
444
  },
445
  {
446
+ "epoch": 0.67,
447
+ "learning_rate": 8.410438087153911e-07,
448
+ "logits/chosen": 0.22406017780303955,
449
+ "logits/rejected": 0.10108964145183563,
450
+ "logps/chosen": -472.781005859375,
451
+ "logps/rejected": -1149.229248046875,
452
+ "loss": 0.4105,
453
+ "rewards/accuracies": 0.737500011920929,
454
+ "rewards/chosen": -1.571500301361084,
455
+ "rewards/margins": 7.018779754638672,
456
+ "rewards/rejected": -8.590279579162598,
457
  "step": 320
458
  },
459
  {
460
+ "epoch": 0.69,
461
+ "learning_rate": 8.274303669726426e-07,
462
+ "logits/chosen": 0.29168057441711426,
463
+ "logits/rejected": 0.15196271240711212,
464
+ "logps/chosen": -600.2703857421875,
465
+ "logps/rejected": -1573.563232421875,
466
+ "loss": 0.402,
467
+ "rewards/accuracies": 0.7124999761581421,
468
+ "rewards/chosen": -3.015018939971924,
469
+ "rewards/margins": 9.613102912902832,
470
+ "rewards/rejected": -12.628121376037598,
471
  "step": 330
472
  },
473
  {
474
+ "epoch": 0.71,
475
+ "learning_rate": 8.133779948881513e-07,
476
+ "logits/chosen": 0.33964803814888,
477
+ "logits/rejected": 0.3003019690513611,
478
+ "logps/chosen": -494.3397521972656,
479
+ "logps/rejected": -1070.0369873046875,
480
+ "loss": 0.4047,
481
+ "rewards/accuracies": 0.800000011920929,
482
+ "rewards/chosen": -2.076432943344116,
483
+ "rewards/margins": 6.0156073570251465,
484
+ "rewards/rejected": -8.092041015625,
485
  "step": 340
486
  },
487
  {
488
+ "epoch": 0.73,
489
+ "learning_rate": 7.989055300930704e-07,
490
+ "logits/chosen": 0.2937834858894348,
491
+ "logits/rejected": 0.22495004534721375,
492
+ "logps/chosen": -525.5234985351562,
493
+ "logps/rejected": -1205.3031005859375,
494
+ "loss": 0.404,
495
+ "rewards/accuracies": 0.6499999761581421,
496
+ "rewards/chosen": -2.2402138710021973,
497
+ "rewards/margins": 6.707070350646973,
498
+ "rewards/rejected": -8.947283744812012,
499
  "step": 350
500
  },
501
  {
502
+ "epoch": 0.75,
503
+ "learning_rate": 7.840323733655778e-07,
504
+ "logits/chosen": 0.20088157057762146,
505
+ "logits/rejected": 0.07263296842575073,
506
+ "logps/chosen": -569.1207275390625,
507
+ "logps/rejected": -1455.37548828125,
508
+ "loss": 0.3978,
509
+ "rewards/accuracies": 0.75,
510
+ "rewards/chosen": -2.4096343517303467,
511
+ "rewards/margins": 9.322293281555176,
512
+ "rewards/rejected": -11.731927871704102,
513
  "step": 360
514
  },
515
  {
516
+ "epoch": 0.77,
517
+ "learning_rate": 7.687784626235447e-07,
518
+ "logits/chosen": 0.04894110560417175,
519
+ "logits/rejected": -0.0861678496003151,
520
+ "logps/chosen": -700.1285400390625,
521
+ "logps/rejected": -1508.282958984375,
522
+ "loss": 0.4002,
523
+ "rewards/accuracies": 0.699999988079071,
524
+ "rewards/chosen": -3.51697039604187,
525
+ "rewards/margins": 8.459676742553711,
526
+ "rewards/rejected": -11.97664737701416,
527
  "step": 370
528
  },
529
  {
530
+ "epoch": 0.8,
531
+ "learning_rate": 7.531642461971514e-07,
532
+ "logits/chosen": 0.13316456973552704,
533
+ "logits/rejected": -0.1390618234872818,
534
+ "logps/chosen": -834.8723754882812,
535
+ "logps/rejected": -1582.9775390625,
536
+ "loss": 0.3995,
537
+ "rewards/accuracies": 0.7124999761581421,
538
+ "rewards/chosen": -5.417990684509277,
539
+ "rewards/margins": 7.689940452575684,
540
+ "rewards/rejected": -13.107931137084961,
541
  "step": 380
542
  },
543
  {
544
+ "epoch": 0.82,
545
+ "learning_rate": 7.372106554172801e-07,
546
+ "logits/chosen": 0.05666132643818855,
547
+ "logits/rejected": -0.015092259272933006,
548
+ "logps/chosen": -540.2429809570312,
549
+ "logps/rejected": -1435.7958984375,
550
+ "loss": 0.3973,
551
+ "rewards/accuracies": 0.71875,
552
+ "rewards/chosen": -1.985515832901001,
553
+ "rewards/margins": 9.362586975097656,
554
+ "rewards/rejected": -11.348101615905762,
555
  "step": 390
556
  },
557
  {
558
+ "epoch": 0.84,
559
+ "learning_rate": 7.209390765564318e-07,
560
+ "logits/chosen": 0.16617372632026672,
561
+ "logits/rejected": -0.0461905337870121,
562
+ "logps/chosen": -723.858642578125,
563
+ "logps/rejected": -1655.4898681640625,
564
+ "loss": 0.4013,
565
+ "rewards/accuracies": 0.737500011920929,
566
+ "rewards/chosen": -4.068535804748535,
567
+ "rewards/margins": 9.538484573364258,
568
+ "rewards/rejected": -13.607022285461426,
569
  "step": 400
570
  },
571
  {
572
+ "epoch": 0.86,
573
+ "learning_rate": 7.043713221597773e-07,
574
+ "logits/chosen": 0.06812890619039536,
575
+ "logits/rejected": -0.07965459674596786,
576
+ "logps/chosen": -683.1234130859375,
577
+ "logps/rejected": -1550.495361328125,
578
+ "loss": 0.4009,
579
+ "rewards/accuracies": 0.6812499761581421,
580
+ "rewards/chosen": -3.939258575439453,
581
+ "rewards/margins": 8.659662246704102,
582
+ "rewards/rejected": -12.598922729492188,
583
  "step": 410
584
  },
585
  {
586
+ "epoch": 0.88,
587
+ "learning_rate": 6.875296018047809e-07,
588
+ "logits/chosen": 0.20702295005321503,
589
+ "logits/rejected": 0.037614382803440094,
590
+ "logps/chosen": -662.0958251953125,
591
+ "logps/rejected": -1592.722900390625,
592
+ "loss": 0.3984,
593
+ "rewards/accuracies": 0.6875,
594
+ "rewards/chosen": -3.6876208782196045,
595
+ "rewards/margins": 9.383062362670898,
596
+ "rewards/rejected": -13.070683479309082,
597
  "step": 420
598
  },
599
  {
600
+ "epoch": 0.9,
601
+ "learning_rate": 6.704364923285857e-07,
602
+ "logits/chosen": 0.0521254763007164,
603
+ "logits/rejected": -0.16673153638839722,
604
+ "logps/chosen": -708.7855224609375,
605
+ "logps/rejected": -1829.0189208984375,
606
+ "loss": 0.398,
607
+ "rewards/accuracies": 0.7562500238418579,
608
+ "rewards/chosen": -3.7470862865448,
609
+ "rewards/margins": 11.305286407470703,
610
+ "rewards/rejected": -15.052372932434082,
611
  "step": 430
612
  },
613
  {
614
+ "epoch": 0.92,
615
+ "learning_rate": 6.531149075630796e-07,
616
+ "logits/chosen": 0.03881196305155754,
617
+ "logits/rejected": -0.04577777534723282,
618
+ "logps/chosen": -575.1758422851562,
619
+ "logps/rejected": -1710.799072265625,
620
+ "loss": 0.399,
621
+ "rewards/accuracies": 0.706250011920929,
622
+ "rewards/chosen": -2.906187057495117,
623
+ "rewards/margins": 11.424031257629395,
624
+ "rewards/rejected": -14.330218315124512,
625
  "step": 440
626
  },
627
  {
628
+ "epoch": 0.94,
629
+ "learning_rate": 6.355880676182085e-07,
630
+ "logits/chosen": 0.03127314895391464,
631
+ "logits/rejected": -0.32143911719322205,
632
+ "logps/chosen": -950.0020751953125,
633
+ "logps/rejected": -2152.05029296875,
634
+ "loss": 0.3881,
635
+ "rewards/accuracies": 0.7124999761581421,
636
+ "rewards/chosen": -6.0005059242248535,
637
+ "rewards/margins": 12.68049144744873,
638
+ "rewards/rejected": -18.68099594116211,
639
  "step": 450
640
  },
641
  {
642
+ "epoch": 0.96,
643
+ "learning_rate": 6.178794677547137e-07,
644
+ "logits/chosen": 0.10744090378284454,
645
+ "logits/rejected": -0.19111321866512299,
646
+ "logps/chosen": -911.0108642578125,
647
+ "logps/rejected": -2054.70556640625,
648
+ "loss": 0.3996,
649
+ "rewards/accuracies": 0.668749988079071,
650
+ "rewards/chosen": -6.130732536315918,
651
+ "rewards/margins": 11.758760452270508,
652
+ "rewards/rejected": -17.88949203491211,
653
  "step": 460
654
  },
655
  {
656
+ "epoch": 0.98,
657
+ "learning_rate": 6.000128468880222e-07,
658
+ "logits/chosen": -0.006098261568695307,
659
+ "logits/rejected": -0.45566266775131226,
660
+ "logps/chosen": -1054.670166015625,
661
+ "logps/rejected": -2545.79296875,
662
+ "loss": 0.3969,
663
+ "rewards/accuracies": 0.78125,
664
+ "rewards/chosen": -7.155210018157959,
665
+ "rewards/margins": 15.32789134979248,
666
+ "rewards/rejected": -22.48310089111328,
667
  "step": 470
668
  },
669
  {
670
+ "epoch": 1.0,
671
+ "learning_rate": 5.820121557655108e-07,
672
+ "logits/chosen": 0.051991622895002365,
673
+ "logits/rejected": -0.2006780356168747,
674
+ "logps/chosen": -758.8955078125,
675
+ "logps/rejected": -2237.24267578125,
676
+ "loss": 0.3949,
677
+ "rewards/accuracies": 0.78125,
678
+ "rewards/chosen": -4.22313928604126,
679
+ "rewards/margins": 14.878347396850586,
680
+ "rewards/rejected": -19.101486206054688,
681
  "step": 480
682
  },
683
  {
684
+ "epoch": 1.03,
685
+ "learning_rate": 5.639015248598023e-07,
686
+ "logits/chosen": -0.026292938739061356,
687
+ "logits/rejected": -0.274114191532135,
688
+ "logps/chosen": -778.0355224609375,
689
+ "logps/rejected": -1928.9573974609375,
690
+ "loss": 0.3625,
691
+ "rewards/accuracies": 0.7875000238418579,
692
+ "rewards/chosen": -4.59261417388916,
693
+ "rewards/margins": 11.645755767822266,
694
+ "rewards/rejected": -16.23836898803711,
695
  "step": 490
696
  },
697
  {
698
+ "epoch": 1.05,
699
+ "learning_rate": 5.457052320211339e-07,
700
+ "logits/chosen": -0.017996052280068398,
701
+ "logits/rejected": -0.45051470398902893,
702
+ "logps/chosen": -968.8033447265625,
703
+ "logps/rejected": -2387.48486328125,
704
+ "loss": 0.3659,
705
  "rewards/accuracies": 0.762499988079071,
706
+ "rewards/chosen": -6.734767913818359,
707
+ "rewards/margins": 14.207613945007324,
708
+ "rewards/rejected": -20.942384719848633,
709
  "step": 500
710
  },
711
  {
712
+ "epoch": 1.07,
713
+ "learning_rate": 5.274476699321637e-07,
714
+ "logits/chosen": 0.005720546934753656,
715
+ "logits/rejected": -0.5387086868286133,
716
+ "logps/chosen": -1205.0419921875,
717
+ "logps/rejected": -2745.4345703125,
718
+ "loss": 0.3687,
719
+ "rewards/accuracies": 0.8125,
720
+ "rewards/chosen": -8.98927116394043,
721
+ "rewards/margins": 15.691691398620605,
722
+ "rewards/rejected": -24.680959701538086,
723
  "step": 510
724
  },
725
  {
726
+ "epoch": 1.09,
727
+ "learning_rate": 5.091533134088387e-07,
728
+ "logits/chosen": -0.005288724787533283,
729
+ "logits/rejected": -0.1951751857995987,
730
+ "logps/chosen": -1033.7554931640625,
731
+ "logps/rejected": -2083.949462890625,
732
+ "loss": 0.3716,
733
+ "rewards/accuracies": 0.737500011920929,
734
+ "rewards/chosen": -6.909409523010254,
735
+ "rewards/margins": 10.849687576293945,
736
+ "rewards/rejected": -17.75909423828125,
737
  "step": 520
738
  },
739
  {
740
+ "epoch": 1.11,
741
+ "learning_rate": 4.908466865911614e-07,
742
+ "logits/chosen": 0.07674840837717056,
743
+ "logits/rejected": -0.22934529185295105,
744
+ "logps/chosen": -611.5888671875,
745
+ "logps/rejected": -1846.804443359375,
746
+ "loss": 0.3658,
747
+ "rewards/accuracies": 0.762499988079071,
748
+ "rewards/chosen": -2.9781932830810547,
749
+ "rewards/margins": 12.689587593078613,
750
+ "rewards/rejected": -15.667780876159668,
751
  "step": 530
752
  },
753
  {
754
+ "epoch": 1.13,
755
+ "learning_rate": 4.7255233006783624e-07,
756
+ "logits/chosen": 0.021334605291485786,
757
+ "logits/rejected": -0.3709332048892975,
758
+ "logps/chosen": -970.0914306640625,
759
+ "logps/rejected": -2093.108642578125,
760
+ "loss": 0.3666,
761
+ "rewards/accuracies": 0.75,
762
+ "rewards/chosen": -6.791567802429199,
763
+ "rewards/margins": 11.35092544555664,
764
+ "rewards/rejected": -18.142492294311523,
765
  "step": 540
766
  },
767
  {
768
+ "epoch": 1.15,
769
+ "learning_rate": 4.5429476797886617e-07,
770
+ "logits/chosen": 0.033512182533741,
771
+ "logits/rejected": -0.3936399519443512,
772
+ "logps/chosen": -856.0861206054688,
773
+ "logps/rejected": -2535.446044921875,
774
+ "loss": 0.3693,
775
+ "rewards/accuracies": 0.75,
776
+ "rewards/chosen": -5.2735795974731445,
777
+ "rewards/margins": 17.031352996826172,
778
+ "rewards/rejected": -22.304935455322266,
779
  "step": 550
780
  },
781
  {
782
+ "epoch": 1.17,
783
+ "learning_rate": 4.3609847514019763e-07,
784
+ "logits/chosen": -0.05847325176000595,
785
+ "logits/rejected": -0.5101506114006042,
786
+ "logps/chosen": -715.2965087890625,
787
+ "logps/rejected": -1808.1195068359375,
788
+ "loss": 0.3829,
789
  "rewards/accuracies": 0.793749988079071,
790
+ "rewards/chosen": -3.906663179397583,
791
+ "rewards/margins": 11.054914474487305,
792
+ "rewards/rejected": -14.961578369140625,
793
  "step": 560
794
  },
795
  {
796
+ "epoch": 1.19,
797
+ "learning_rate": 4.179878442344892e-07,
798
+ "logits/chosen": 0.03838271647691727,
799
+ "logits/rejected": -0.41066282987594604,
800
+ "logps/chosen": -600.6312866210938,
801
+ "logps/rejected": -1975.118408203125,
802
+ "loss": 0.3707,
803
+ "rewards/accuracies": 0.800000011920929,
804
+ "rewards/chosen": -3.174193859100342,
805
+ "rewards/margins": 13.602760314941406,
806
+ "rewards/rejected": -16.77695655822754,
807
  "step": 570
808
  },
809
  {
810
+ "epoch": 1.21,
811
+ "learning_rate": 3.9998715311197783e-07,
812
+ "logits/chosen": 0.00818496011197567,
813
+ "logits/rejected": -0.46959954500198364,
814
+ "logps/chosen": -1030.3143310546875,
815
+ "logps/rejected": -2606.57666015625,
816
+ "loss": 0.3699,
817
+ "rewards/accuracies": 0.768750011920929,
818
+ "rewards/chosen": -7.15012264251709,
819
+ "rewards/margins": 15.829042434692383,
820
+ "rewards/rejected": -22.979164123535156,
821
  "step": 580
822
  },
823
  {
824
+ "epoch": 1.24,
825
+ "learning_rate": 3.821205322452863e-07,
826
+ "logits/chosen": 0.0865226536989212,
827
+ "logits/rejected": -0.49088770151138306,
828
+ "logps/chosen": -979.1468505859375,
829
+ "logps/rejected": -2228.843017578125,
830
+ "loss": 0.3649,
831
+ "rewards/accuracies": 0.737500011920929,
832
+ "rewards/chosen": -6.757370948791504,
833
+ "rewards/margins": 12.640668869018555,
834
+ "rewards/rejected": -19.398040771484375,
835
  "step": 590
836
  },
837
  {
838
+ "epoch": 1.26,
839
+ "learning_rate": 3.6441193238179146e-07,
840
+ "logits/chosen": 0.06730663031339645,
841
+ "logits/rejected": -0.47154170274734497,
842
+ "logps/chosen": -628.3743896484375,
843
+ "logps/rejected": -2107.99169921875,
844
+ "loss": 0.359,
845
+ "rewards/accuracies": 0.824999988079071,
846
+ "rewards/chosen": -3.383096218109131,
847
+ "rewards/margins": 14.71300983428955,
848
+ "rewards/rejected": -18.096105575561523,
849
  "step": 600
850
  },
851
  {
852
+ "epoch": 1.28,
853
+ "learning_rate": 3.4688509243692034e-07,
854
+ "logits/chosen": -0.07551614940166473,
855
+ "logits/rejected": -0.4228256344795227,
856
+ "logps/chosen": -625.5606689453125,
857
+ "logps/rejected": -2476.503173828125,
858
+ "loss": 0.3736,
859
+ "rewards/accuracies": 0.768750011920929,
860
+ "rewards/chosen": -3.288256883621216,
861
+ "rewards/margins": 17.969881057739258,
862
+ "rewards/rejected": -21.258136749267578,
863
  "step": 610
864
  },
865
  {
866
+ "epoch": 1.3,
867
+ "learning_rate": 3.295635076714144e-07,
868
+ "logits/chosen": -0.012427730485796928,
869
+ "logits/rejected": -0.44272977113723755,
870
+ "logps/chosen": -558.3466796875,
871
+ "logps/rejected": -1993.5582275390625,
872
+ "loss": 0.3655,
873
+ "rewards/accuracies": 0.8062499761581421,
874
+ "rewards/chosen": -3.1298646926879883,
875
+ "rewards/margins": 14.332046508789062,
876
+ "rewards/rejected": -17.461912155151367,
877
  "step": 620
878
  },
879
  {
880
+ "epoch": 1.32,
881
+ "learning_rate": 3.12470398195219e-07,
882
+ "logits/chosen": -0.04589563235640526,
883
+ "logits/rejected": -0.566718339920044,
884
+ "logps/chosen": -788.1644287109375,
885
+ "logps/rejected": -2312.9814453125,
886
+ "loss": 0.3652,
887
+ "rewards/accuracies": 0.75,
888
+ "rewards/chosen": -4.818960189819336,
889
+ "rewards/margins": 14.982978820800781,
890
+ "rewards/rejected": -19.80194091796875,
891
  "step": 630
892
  },
893
  {
894
+ "epoch": 1.34,
895
+ "learning_rate": 2.956286778402226e-07,
896
+ "logits/chosen": -0.10815076529979706,
897
+ "logits/rejected": -0.40516456961631775,
898
+ "logps/chosen": -773.5537719726562,
899
+ "logps/rejected": -2297.0078125,
900
+ "loss": 0.3614,
901
+ "rewards/accuracies": 0.800000011920929,
902
+ "rewards/chosen": -3.9812560081481934,
903
+ "rewards/margins": 15.931207656860352,
904
+ "rewards/rejected": -19.912464141845703,
905
  "step": 640
906
  },
907
  {
908
+ "epoch": 1.36,
909
+ "learning_rate": 2.7906092344356826e-07,
910
+ "logits/chosen": 0.0947767049074173,
911
+ "logits/rejected": -0.34178268909454346,
912
+ "logps/chosen": -563.9888916015625,
913
+ "logps/rejected": -1955.478759765625,
914
+ "loss": 0.3597,
915
+ "rewards/accuracies": 0.8062499761581421,
916
+ "rewards/chosen": -2.7671079635620117,
917
+ "rewards/margins": 14.173640251159668,
918
+ "rewards/rejected": -16.940750122070312,
919
  "step": 650
920
  },
921
  {
922
+ "epoch": 1.38,
923
+ "learning_rate": 2.6278934458271996e-07,
924
+ "logits/chosen": -0.14527355134487152,
925
+ "logits/rejected": -0.5815786123275757,
926
+ "logps/chosen": -862.9083862304688,
927
+ "logps/rejected": -2731.31298828125,
928
+ "loss": 0.3611,
929
+ "rewards/accuracies": 0.762499988079071,
930
+ "rewards/chosen": -5.637027740478516,
931
+ "rewards/margins": 18.794246673583984,
932
+ "rewards/rejected": -24.4312744140625,
933
  "step": 660
934
  },
935
  {
936
+ "epoch": 1.4,
937
+ "learning_rate": 2.468357538028487e-07,
938
+ "logits/chosen": -0.026713650673627853,
939
+ "logits/rejected": -0.5581346750259399,
940
+ "logps/chosen": -826.4105224609375,
941
+ "logps/rejected": -2481.40087890625,
942
+ "loss": 0.3602,
943
  "rewards/accuracies": 0.768750011920929,
944
+ "rewards/chosen": -5.318304061889648,
945
+ "rewards/margins": 16.639951705932617,
946
+ "rewards/rejected": -21.958255767822266,
947
  "step": 670
948
  },
949
  {
950
+ "epoch": 1.42,
951
+ "learning_rate": 2.312215373764551e-07,
952
+ "logits/chosen": -0.17602002620697021,
953
+ "logits/rejected": -0.7885143756866455,
954
+ "logps/chosen": -1041.0810546875,
955
+ "logps/rejected": -2838.9697265625,
956
+ "loss": 0.3635,
957
+ "rewards/accuracies": 0.78125,
958
+ "rewards/chosen": -6.526537895202637,
959
+ "rewards/margins": 18.3980655670166,
960
+ "rewards/rejected": -24.924602508544922,
961
  "step": 680
962
  },
963
  {
964
+ "epoch": 1.44,
965
+ "learning_rate": 2.1596762663442213e-07,
966
+ "logits/chosen": -0.11598227918148041,
967
+ "logits/rejected": -0.8071243166923523,
968
+ "logps/chosen": -812.7769165039062,
969
+ "logps/rejected": -2409.599365234375,
970
+ "loss": 0.3651,
971
+ "rewards/accuracies": 0.7749999761581421,
972
+ "rewards/chosen": -5.32067346572876,
973
+ "rewards/margins": 16.22947120666504,
974
+ "rewards/rejected": -21.55014419555664,
975
  "step": 690
976
  },
977
  {
978
+ "epoch": 1.47,
979
+ "learning_rate": 2.0109446990692963e-07,
980
+ "logits/chosen": -0.1322178840637207,
981
+ "logits/rejected": -0.6750038862228394,
982
+ "logps/chosen": -735.38623046875,
983
+ "logps/rejected": -2221.10107421875,
984
+ "loss": 0.3629,
985
+ "rewards/accuracies": 0.793749988079071,
986
+ "rewards/chosen": -4.028614044189453,
987
+ "rewards/margins": 14.953264236450195,
988
+ "rewards/rejected": -18.98187828063965,
989
  "step": 700
990
  },
991
  {
992
+ "epoch": 1.49,
993
+ "learning_rate": 1.8662200511184872e-07,
994
+ "logits/chosen": -0.07118358463048935,
995
+ "logits/rejected": -0.6492079496383667,
996
+ "logps/chosen": -733.4075317382812,
997
+ "logps/rejected": -1978.872314453125,
998
+ "loss": 0.3605,
999
+ "rewards/accuracies": 0.8374999761581421,
1000
+ "rewards/chosen": -4.418785572052002,
1001
+ "rewards/margins": 12.647371292114258,
1002
+ "rewards/rejected": -17.0661563873291,
1003
  "step": 710
1004
  },
1005
  {
1006
+ "epoch": 1.51,
1007
+ "learning_rate": 1.725696330273575e-07,
1008
+ "logits/chosen": -0.013000762090086937,
1009
+ "logits/rejected": -0.4818207621574402,
1010
+ "logps/chosen": -668.6626586914062,
1011
+ "logps/rejected": -2029.7943115234375,
1012
+ "loss": 0.3526,
1013
+ "rewards/accuracies": 0.8125,
1014
+ "rewards/chosen": -3.4144835472106934,
1015
+ "rewards/margins": 13.840230941772461,
1016
+ "rewards/rejected": -17.25471305847168,
1017
  "step": 720
1018
  },
1019
  {
1020
+ "epoch": 1.53,
1021
+ "learning_rate": 1.589561912846089e-07,
1022
+ "logits/chosen": -0.08404045552015305,
1023
+ "logits/rejected": -0.6345758438110352,
1024
+ "logps/chosen": -747.2855224609375,
1025
+ "logps/rejected": -2472.15234375,
1026
+ "loss": 0.3523,
1027
+ "rewards/accuracies": 0.8125,
1028
+ "rewards/chosen": -4.540907859802246,
1029
+ "rewards/margins": 17.551443099975586,
1030
+ "rewards/rejected": -22.092350006103516,
1031
  "step": 730
1032
  },
1033
  {
1034
+ "epoch": 1.55,
1035
+ "learning_rate": 1.4579992911531496e-07,
1036
+ "logits/chosen": -0.21502713859081268,
1037
+ "logits/rejected": -0.8310446739196777,
1038
+ "logps/chosen": -1271.535888671875,
1039
+ "logps/rejected": -2498.06884765625,
1040
+ "loss": 0.3595,
1041
+ "rewards/accuracies": 0.7124999761581421,
1042
+ "rewards/chosen": -9.246122360229492,
1043
+ "rewards/margins": 12.74262809753418,
1044
+ "rewards/rejected": -21.988750457763672,
1045
  "step": 740
1046
  },
1047
  {
1048
+ "epoch": 1.57,
1049
+ "learning_rate": 1.3311848288809813e-07,
1050
+ "logits/chosen": -0.06439349055290222,
1051
+ "logits/rejected": -0.5617343187332153,
1052
+ "logps/chosen": -864.2130126953125,
1053
+ "logps/rejected": -2242.72119140625,
1054
+ "loss": 0.3647,
1055
+ "rewards/accuracies": 0.768750011920929,
1056
+ "rewards/chosen": -5.325442790985107,
1057
+ "rewards/margins": 14.041879653930664,
1058
+ "rewards/rejected": -19.367321014404297,
1059
  "step": 750
1060
  },
1061
  {
1062
+ "epoch": 1.59,
1063
+ "learning_rate": 1.209288524664029e-07,
1064
+ "logits/chosen": -0.007544988300651312,
1065
+ "logits/rejected": -0.6543707251548767,
1066
+ "logps/chosen": -929.5089721679688,
1067
+ "logps/rejected": -2724.5986328125,
1068
+ "loss": 0.3608,
1069
+ "rewards/accuracies": 0.768750011920929,
1070
+ "rewards/chosen": -5.293430805206299,
1071
+ "rewards/margins": 18.260942459106445,
1072
+ "rewards/rejected": -23.554372787475586,
1073
  "step": 760
1074
  },
1075
  {
1076
+ "epoch": 1.61,
1077
+ "learning_rate": 1.0924737841966497e-07,
1078
+ "logits/chosen": -0.09723073244094849,
1079
+ "logits/rejected": -0.6276915669441223,
1080
+ "logps/chosen": -1004.7063598632812,
1081
+ "logps/rejected": -2738.22119140625,
1082
+ "loss": 0.3611,
1083
+ "rewards/accuracies": 0.8374999761581421,
1084
+ "rewards/chosen": -6.353689193725586,
1085
+ "rewards/margins": 17.72109031677246,
1086
+ "rewards/rejected": -24.074779510498047,
1087
  "step": 770
1088
  },
1089
  {
1090
+ "epoch": 1.63,
1091
+ "learning_rate": 9.808972011828054e-08,
1092
+ "logits/chosen": -0.10224993526935577,
1093
+ "logits/rejected": -0.7376490831375122,
1094
+ "logps/chosen": -815.9857788085938,
1095
+ "logps/rejected": -2558.365478515625,
1096
+ "loss": 0.3546,
1097
+ "rewards/accuracies": 0.78125,
1098
+ "rewards/chosen": -5.3413801193237305,
1099
+ "rewards/margins": 17.04741859436035,
1100
+ "rewards/rejected": -22.388797760009766,
1101
  "step": 780
1102
  },
1103
  {
1104
+ "epoch": 1.65,
1105
+ "learning_rate": 8.747083474174527e-08,
1106
+ "logits/chosen": -0.06878294795751572,
1107
+ "logits/rejected": -0.6381550431251526,
1108
+ "logps/chosen": -684.91455078125,
1109
+ "logps/rejected": -2131.619140625,
1110
+ "loss": 0.3659,
1111
+ "rewards/accuracies": 0.800000011920929,
1112
+ "rewards/chosen": -3.895407199859619,
1113
+ "rewards/margins": 14.6422700881958,
1114
+ "rewards/rejected": -18.537677764892578,
1115
  "step": 790
1116
  },
1117
  {
1118
+ "epoch": 1.67,
1119
+ "learning_rate": 7.740495722810269e-08,
1120
+ "logits/chosen": -0.13722161948680878,
1121
+ "logits/rejected": -0.8848565220832825,
1122
+ "logps/chosen": -734.0003662109375,
1123
+ "logps/rejected": -2482.77490234375,
1124
+ "loss": 0.3646,
1125
+ "rewards/accuracies": 0.78125,
1126
+ "rewards/chosen": -4.181973934173584,
1127
+ "rewards/margins": 17.644306182861328,
1128
+ "rewards/rejected": -21.826278686523438,
1129
  "step": 800
1130
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1131
  {
1132
  "epoch": 1.7,
1133
+ "learning_rate": 6.790558119157597e-08,
1134
+ "logits/chosen": -0.0752606987953186,
1135
+ "logits/rejected": -0.6523066163063049,
1136
+ "logps/chosen": -913.1563720703125,
1137
+ "logps/rejected": -2458.097412109375,
1138
+ "loss": 0.3615,
1139
+ "rewards/accuracies": 0.78125,
1140
+ "rewards/chosen": -5.732936859130859,
1141
+ "rewards/margins": 15.887182235717773,
1142
+ "rewards/rejected": -21.620121002197266,
1143
+ "step": 810
1144
  },
1145
  {
1146
  "epoch": 1.72,
1147
+ "learning_rate": 5.898544083397e-08,
1148
+ "logits/chosen": 0.020809601992368698,
1149
+ "logits/rejected": -0.5849136114120483,
1150
+ "logps/chosen": -540.7110595703125,
1151
+ "logps/rejected": -2217.325927734375,
1152
+ "loss": 0.3579,
1153
+ "rewards/accuracies": 0.7562500238418579,
1154
+ "rewards/chosen": -2.4081716537475586,
1155
+ "rewards/margins": 16.857872009277344,
1156
+ "rewards/rejected": -19.266042709350586,
1157
+ "step": 820
1158
  },
1159
  {
1160
  "epoch": 1.74,
1161
+ "learning_rate": 5.065649387408705e-08,
1162
+ "logits/chosen": -0.09206173568964005,
1163
+ "logits/rejected": -0.7031819224357605,
1164
+ "logps/chosen": -1010.9982299804688,
1165
+ "logps/rejected": -2271.75537109375,
1166
+ "loss": 0.3578,
1167
+ "rewards/accuracies": 0.7562500238418579,
1168
+ "rewards/chosen": -6.834753513336182,
1169
+ "rewards/margins": 12.86634349822998,
1170
+ "rewards/rejected": -19.701095581054688,
1171
+ "step": 830
1172
  },
1173
  {
1174
  "epoch": 1.76,
1175
+ "learning_rate": 4.292990551804171e-08,
1176
+ "logits/chosen": -0.19090762734413147,
1177
+ "logits/rejected": -0.7296367883682251,
1178
+ "logps/chosen": -932.6066284179688,
1179
+ "logps/rejected": -2595.78759765625,
1180
+ "loss": 0.3589,
1181
+ "rewards/accuracies": 0.793749988079071,
1182
+ "rewards/chosen": -6.08009672164917,
1183
+ "rewards/margins": 16.833927154541016,
1184
+ "rewards/rejected": -22.914024353027344,
1185
+ "step": 840
1186
  },
1187
  {
1188
  "epoch": 1.78,
1189
+ "learning_rate": 3.581603349196371e-08,
1190
+ "logits/chosen": -0.07513806223869324,
1191
+ "logits/rejected": -0.844355583190918,
1192
+ "logps/chosen": -655.0567626953125,
1193
+ "logps/rejected": -2801.64501953125,
1194
+ "loss": 0.3561,
1195
+ "rewards/accuracies": 0.8125,
1196
+ "rewards/chosen": -3.282649517059326,
1197
+ "rewards/margins": 21.719741821289062,
1198
+ "rewards/rejected": -25.002391815185547,
1199
+ "step": 850
1200
  },
1201
  {
1202
  "epoch": 1.8,
1203
+ "learning_rate": 2.9324414157151367e-08,
1204
+ "logits/chosen": -0.12824216485023499,
1205
+ "logits/rejected": -0.7398759126663208,
1206
+ "logps/chosen": -818.0006103515625,
1207
+ "logps/rejected": -2734.663818359375,
1208
+ "loss": 0.3529,
1209
+ "rewards/accuracies": 0.8374999761581421,
1210
+ "rewards/chosen": -4.799277305603027,
1211
+ "rewards/margins": 19.707080841064453,
1212
+ "rewards/rejected": -24.506359100341797,
1213
+ "step": 860
1214
  },
1215
  {
1216
  "epoch": 1.82,
1217
+ "learning_rate": 2.3463749726290284e-08,
1218
+ "logits/chosen": -0.22140809893608093,
1219
+ "logits/rejected": -0.7506135702133179,
1220
+ "logps/chosen": -939.72900390625,
1221
+ "logps/rejected": -2747.75537109375,
1222
+ "loss": 0.3642,
1223
+ "rewards/accuracies": 0.7437499761581421,
1224
+ "rewards/chosen": -6.083466053009033,
1225
+ "rewards/margins": 18.29672622680664,
1226
+ "rewards/rejected": -24.380191802978516,
1227
+ "step": 870
1228
  },
1229
  {
1230
  "epoch": 1.84,
1231
+ "learning_rate": 1.824189659787284e-08,
1232
+ "logits/chosen": -0.051941704005002975,
1233
+ "logits/rejected": -0.5002130270004272,
1234
+ "logps/chosen": -670.8041381835938,
1235
+ "logps/rejected": -2170.00439453125,
1236
+ "loss": 0.3627,
1237
  "rewards/accuracies": 0.731249988079071,
1238
+ "rewards/chosen": -3.512953519821167,
1239
+ "rewards/margins": 15.10865306854248,
1240
+ "rewards/rejected": -18.621606826782227,
1241
+ "step": 880
1242
  },
1243
  {
1244
  "epoch": 1.86,
1245
+ "learning_rate": 1.3665854824458035e-08,
1246
+ "logits/chosen": -0.14072179794311523,
1247
+ "logits/rejected": -0.6090524792671204,
1248
+ "logps/chosen": -987.47119140625,
1249
+ "logps/rejected": -2067.42236328125,
1250
+ "loss": 0.3647,
1251
+ "rewards/accuracies": 0.762499988079071,
1252
+ "rewards/chosen": -6.492043972015381,
1253
+ "rewards/margins": 11.107647895812988,
1254
+ "rewards/rejected": -17.599689483642578,
1255
+ "step": 890
1256
  },
1257
  {
1258
  "epoch": 1.88,
1259
+ "learning_rate": 9.741758728888217e-09,
1260
+ "logits/chosen": -0.022175291553139687,
1261
+ "logits/rejected": -0.6860365271568298,
1262
+ "logps/chosen": -564.6074829101562,
1263
+ "logps/rejected": -2140.48681640625,
1264
+ "loss": 0.3545,
1265
+ "rewards/accuracies": 0.8187500238418579,
1266
+ "rewards/chosen": -2.2749757766723633,
1267
+ "rewards/margins": 15.992487907409668,
1268
+ "rewards/rejected": -18.2674617767334,
1269
+ "step": 900
1270
  },
1271
  {
1272
  "epoch": 1.91,
1273
+ "learning_rate": 6.474868681043577e-09,
1274
+ "logits/chosen": -0.09770497679710388,
1275
+ "logits/rejected": -0.611740231513977,
1276
+ "logps/chosen": -823.6361083984375,
1277
+ "logps/rejected": -2548.73388671875,
1278
+ "loss": 0.3561,
1279
+ "rewards/accuracies": 0.8062499761581421,
1280
+ "rewards/chosen": -5.1007561683654785,
1281
+ "rewards/margins": 17.229694366455078,
1282
+ "rewards/rejected": -22.330448150634766,
1283
+ "step": 910
1284
  },
1285
  {
1286
  "epoch": 1.93,
1287
+ "learning_rate": 3.869564046156459e-09,
1288
+ "logits/chosen": -0.051595211029052734,
1289
+ "logits/rejected": -0.6881163716316223,
1290
+ "logps/chosen": -818.4075927734375,
1291
+ "logps/rejected": -2222.89892578125,
1292
+ "loss": 0.3632,
1293
+ "rewards/accuracies": 0.7749999761581421,
1294
+ "rewards/chosen": -5.061829566955566,
1295
+ "rewards/margins": 14.042346000671387,
1296
+ "rewards/rejected": -19.104175567626953,
1297
+ "step": 920
1298
  },
1299
  {
1300
  "epoch": 1.95,
1301
+ "learning_rate": 1.929337314139412e-09,
1302
+ "logits/chosen": -0.2066664695739746,
1303
+ "logits/rejected": -0.7445483803749084,
1304
+ "logps/chosen": -808.8455200195312,
1305
+ "logps/rejected": -2086.377197265625,
1306
+ "loss": 0.3643,
1307
+ "rewards/accuracies": 0.7749999761581421,
1308
+ "rewards/chosen": -5.122731685638428,
1309
+ "rewards/margins": 12.991943359375,
1310
+ "rewards/rejected": -18.114675521850586,
1311
+ "step": 930
1312
  },
1313
  {
1314
  "epoch": 1.97,
1315
+ "learning_rate": 6.567894177967325e-10,
1316
+ "logits/chosen": 0.007280481047928333,
1317
+ "logits/rejected": -0.49907398223876953,
1318
+ "logps/chosen": -689.2498779296875,
1319
+ "logps/rejected": -2379.00048828125,
1320
+ "loss": 0.3578,
1321
+ "rewards/accuracies": 0.800000011920929,
1322
+ "rewards/chosen": -3.5881881713867188,
1323
+ "rewards/margins": 17.178848266601562,
1324
+ "rewards/rejected": -20.767038345336914,
1325
+ "step": 940
1326
  },
1327
  {
1328
  "epoch": 1.99,
1329
+ "learning_rate": 5.3626246194704575e-11,
1330
+ "logits/chosen": -0.05890023708343506,
1331
+ "logits/rejected": -0.6455780863761902,
1332
+ "logps/chosen": -604.0885009765625,
1333
+ "logps/rejected": -2217.527099609375,
1334
+ "loss": 0.366,
1335
+ "rewards/accuracies": 0.8125,
1336
+ "rewards/chosen": -3.149109363555908,
1337
+ "rewards/margins": 16.183589935302734,
1338
+ "rewards/rejected": -19.332698822021484,
1339
+ "step": 950
1340
  },
1341
  {
1342
  "epoch": 2.0,
1343
+ "step": 954,
1344
  "total_flos": 0.0,
1345
+ "train_loss": 0.39703809490243847,
1346
+ "train_runtime": 12665.7954,
1347
+ "train_samples_per_second": 9.653,
1348
+ "train_steps_per_second": 0.075
1349
  }
1350
  ],
1351
  "logging_steps": 10,
1352
+ "max_steps": 954,
1353
  "num_train_epochs": 2,
1354
  "save_steps": 10000,
1355
  "total_flos": 0.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:072f43def2f0cfd813f5885acb43461e138fab8cca306b121a33c9104652c789
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1397e8512176db34249f7cc8cdf6db6939284d3b90a77a3bb793ba3757e0ba4c
3
  size 6648