wzhouad commited on
Commit
6a5fd01
1 Parent(s): a9fbc59

Model save

Browse files
README.md CHANGED
@@ -37,7 +37,7 @@ The following hyperparameters were used during training:
37
  - learning_rate: 5e-07
38
  - train_batch_size: 8
39
  - eval_batch_size: 8
40
- - seed: 2
41
  - distributed_type: multi-GPU
42
  - num_devices: 8
43
  - gradient_accumulation_steps: 2
 
37
  - learning_rate: 5e-07
38
  - train_batch_size: 8
39
  - eval_batch_size: 8
40
+ - seed: 3
41
  - distributed_type: multi-GPU
42
  - num_devices: 8
43
  - gradient_accumulation_steps: 2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.13281457475674005,
4
- "train_runtime": 3219.0486,
5
  "train_samples": 51894,
6
- "train_samples_per_second": 16.121,
7
  "train_steps_per_second": 0.126
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.13438091388455145,
4
+ "train_runtime": 3218.6044,
5
  "train_samples": 51894,
6
+ "train_samples_per_second": 16.123,
7
  "train_steps_per_second": 0.126
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab46e8124d9cacc43920ed6943b79f01ec0d55f715b8588dba66c609a2b1d4b
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8362a83dbb810604c9bafec85943233773008800e5be078eda7676e6a5c17d
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d07be1111d3b5bc2c14ffdaf4371c594ea83fb6a105d7bbceaeeb9d43d3f17b
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89dd4954ae3d1cf6d18c20fe5c41eace0d5e2cdd13d0e454a7ba4e26f5a2774
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74f1ed244a41d859eb13abcea7c5eee6a043a8a2c39a674f91dbbfc0cf7e2a35
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3564f1a5524b5df57acdcc790c6cb97db2f4bf1e30dcaacd947fb7902ebd95fa
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.13281457475674005,
4
- "train_runtime": 3219.0486,
5
  "train_samples": 51894,
6
- "train_samples_per_second": 16.121,
7
  "train_steps_per_second": 0.126
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.13438091388455145,
4
+ "train_runtime": 3218.6044,
5
  "train_samples": 51894,
6
+ "train_samples_per_second": 16.123,
7
  "train_steps_per_second": 0.126
8
  }
trainer_state.json CHANGED
@@ -11,11 +11,11 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.2195121951219512e-08,
14
- "logits/chosen": -2.8412017822265625,
15
- "logits/rejected": -2.798035144805908,
16
- "logps/chosen": -481.1488037109375,
17
- "logps/rejected": -89.30835723876953,
18
- "loss": 0.4176,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
@@ -25,570 +25,570 @@
25
  {
26
  "epoch": 0.02,
27
  "learning_rate": 1.219512195121951e-07,
28
- "logits/chosen": -2.760530948638916,
29
- "logits/rejected": -2.7219057083129883,
30
- "logps/chosen": -418.49639892578125,
31
- "logps/rejected": -116.74214935302734,
32
- "loss": 0.4186,
33
- "rewards/accuracies": 0.5486111044883728,
34
- "rewards/chosen": 0.0008742791833356023,
35
- "rewards/margins": 0.0011628220090642571,
36
- "rewards/rejected": -0.0002885429421439767,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.05,
41
  "learning_rate": 2.439024390243902e-07,
42
- "logits/chosen": -2.785912036895752,
43
- "logits/rejected": -2.7400879859924316,
44
- "logps/chosen": -395.93634033203125,
45
- "logps/rejected": -109.28425598144531,
46
- "loss": 0.4276,
47
- "rewards/accuracies": 0.737500011920929,
48
- "rewards/chosen": 0.014862718991935253,
49
- "rewards/margins": 0.029387209564447403,
50
- "rewards/rejected": -0.014524489641189575,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 3.6585365853658536e-07,
56
- "logits/chosen": -2.70774507522583,
57
- "logits/rejected": -2.6736245155334473,
58
- "logps/chosen": -375.3507385253906,
59
- "logps/rejected": -121.47686767578125,
60
- "loss": 0.4137,
61
  "rewards/accuracies": 0.7749999761581421,
62
- "rewards/chosen": 0.06373313069343567,
63
- "rewards/margins": 0.1794833242893219,
64
- "rewards/rejected": -0.11575017869472504,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 4.878048780487804e-07,
70
- "logits/chosen": -2.5397536754608154,
71
- "logits/rejected": -2.5152153968811035,
72
- "logps/chosen": -426.83306884765625,
73
- "logps/rejected": -164.6002960205078,
74
- "loss": 0.3837,
75
- "rewards/accuracies": 0.831250011920929,
76
- "rewards/chosen": 0.06933724880218506,
77
- "rewards/margins": 0.513908326625824,
78
- "rewards/rejected": -0.4445711076259613,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.12,
83
  "learning_rate": 4.992461696250783e-07,
84
- "logits/chosen": -2.425987958908081,
85
- "logits/rejected": -2.42197585105896,
86
- "logps/chosen": -366.08343505859375,
87
- "logps/rejected": -200.18582153320312,
88
- "loss": 0.3126,
89
- "rewards/accuracies": 0.7749999761581421,
90
- "rewards/chosen": -0.1581335961818695,
91
- "rewards/margins": 0.757983386516571,
92
- "rewards/rejected": -0.9161170721054077,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 4.966461721767899e-07,
98
- "logits/chosen": -2.3848328590393066,
99
- "logits/rejected": -2.298985719680786,
100
- "logps/chosen": -490.4847106933594,
101
- "logps/rejected": -254.928466796875,
102
- "loss": 0.2321,
103
- "rewards/accuracies": 0.8187500238418579,
104
- "rewards/chosen": -0.22150389850139618,
105
- "rewards/margins": 1.2847968339920044,
106
- "rewards/rejected": -1.506300926208496,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 4.922100518015975e-07,
112
- "logits/chosen": -2.340442180633545,
113
- "logits/rejected": -2.2820117473602295,
114
- "logps/chosen": -490.87615966796875,
115
- "logps/rejected": -321.3214111328125,
116
- "loss": 0.1643,
117
- "rewards/accuracies": 0.8062499761581421,
118
- "rewards/chosen": -0.6543983817100525,
119
- "rewards/margins": 1.5231101512908936,
120
- "rewards/rejected": -2.17750883102417,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.2,
125
  "learning_rate": 4.859708325770919e-07,
126
- "logits/chosen": -2.3740787506103516,
127
- "logits/rejected": -2.3113033771514893,
128
- "logps/chosen": -471.11651611328125,
129
- "logps/rejected": -314.3768005371094,
130
- "loss": 0.182,
131
- "rewards/accuracies": 0.762499988079071,
132
- "rewards/chosen": -0.38558533787727356,
133
- "rewards/margins": 1.7214374542236328,
134
- "rewards/rejected": -2.107023000717163,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.22,
139
  "learning_rate": 4.779749614980225e-07,
140
- "logits/chosen": -2.2898011207580566,
141
- "logits/rejected": -2.192032814025879,
142
- "logps/chosen": -496.12384033203125,
143
- "logps/rejected": -380.75909423828125,
144
- "loss": 0.1376,
145
- "rewards/accuracies": 0.793749988079071,
146
- "rewards/chosen": -0.8930786848068237,
147
- "rewards/margins": 1.819786787033081,
148
- "rewards/rejected": -2.7128653526306152,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.25,
153
  "learning_rate": 4.682819627081427e-07,
154
- "logits/chosen": -2.363102436065674,
155
- "logits/rejected": -2.2812604904174805,
156
- "logps/chosen": -489.04302978515625,
157
- "logps/rejected": -326.9526062011719,
158
- "loss": 0.1424,
159
- "rewards/accuracies": 0.8187500238418579,
160
- "rewards/chosen": -0.5512069463729858,
161
- "rewards/margins": 1.6478255987167358,
162
- "rewards/rejected": -2.1990325450897217,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 0.27,
167
  "learning_rate": 4.569639943810477e-07,
168
- "logits/chosen": -2.3185834884643555,
169
- "logits/rejected": -2.2058892250061035,
170
- "logps/chosen": -502.36529541015625,
171
- "logps/rejected": -393.0018615722656,
172
- "loss": 0.1052,
173
- "rewards/accuracies": 0.7875000238418579,
174
- "rewards/chosen": -0.6747262477874756,
175
- "rewards/margins": 2.091235399246216,
176
- "rewards/rejected": -2.7659618854522705,
177
  "step": 110
178
  },
179
  {
180
  "epoch": 0.3,
181
  "learning_rate": 4.4410531154874543e-07,
182
- "logits/chosen": -2.242546558380127,
183
- "logits/rejected": -2.098257541656494,
184
- "logps/chosen": -527.9982299804688,
185
- "logps/rejected": -425.9187927246094,
186
- "loss": 0.1028,
187
- "rewards/accuracies": 0.8062499761581421,
188
- "rewards/chosen": -1.0199496746063232,
189
- "rewards/margins": 2.1254653930664062,
190
- "rewards/rejected": -3.1454153060913086,
191
  "step": 120
192
  },
193
  {
194
  "epoch": 0.32,
195
  "learning_rate": 4.298016388768561e-07,
196
- "logits/chosen": -2.2639729976654053,
197
- "logits/rejected": -2.1410276889801025,
198
- "logps/chosen": -474.557373046875,
199
- "logps/rejected": -406.2401123046875,
200
- "loss": 0.102,
201
- "rewards/accuracies": 0.8812500238418579,
202
- "rewards/chosen": -0.7631456851959229,
203
- "rewards/margins": 2.1880364418029785,
204
- "rewards/rejected": -2.9511821269989014,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 0.35,
209
  "learning_rate": 4.1415945805573005e-07,
210
- "logits/chosen": -2.2353649139404297,
211
- "logits/rejected": -2.103256940841675,
212
- "logps/chosen": -494.5367126464844,
213
- "logps/rejected": -413.3063049316406,
214
- "loss": 0.109,
215
- "rewards/accuracies": 0.8125,
216
- "rewards/chosen": -0.6633724570274353,
217
- "rewards/margins": 2.3635334968566895,
218
- "rewards/rejected": -3.0269057750701904,
219
  "step": 140
220
  },
221
  {
222
  "epoch": 0.37,
223
  "learning_rate": 3.972952151123984e-07,
224
- "logits/chosen": -2.243504762649536,
225
- "logits/rejected": -2.1586227416992188,
226
- "logps/chosen": -485.9297790527344,
227
- "logps/rejected": -403.07904052734375,
228
- "loss": 0.0988,
229
- "rewards/accuracies": 0.8374999761581421,
230
- "rewards/chosen": -0.8084269762039185,
231
- "rewards/margins": 1.9387495517730713,
232
- "rewards/rejected": -2.7471766471862793,
233
  "step": 150
234
  },
235
  {
236
  "epoch": 0.39,
237
  "learning_rate": 3.793344535444142e-07,
238
- "logits/chosen": -2.2556967735290527,
239
- "logits/rejected": -2.1410129070281982,
240
- "logps/chosen": -517.8903198242188,
241
- "logps/rejected": -427.27178955078125,
242
- "loss": 0.1095,
243
- "rewards/accuracies": 0.8062499761581421,
244
- "rewards/chosen": -0.9526297450065613,
245
- "rewards/margins": 2.0250723361968994,
246
- "rewards/rejected": -2.9777019023895264,
247
  "step": 160
248
  },
249
  {
250
  "epoch": 0.42,
251
  "learning_rate": 3.604108797288461e-07,
252
- "logits/chosen": -2.2546138763427734,
253
- "logits/rejected": -2.162337064743042,
254
- "logps/chosen": -484.6184997558594,
255
- "logps/rejected": -358.60198974609375,
256
- "loss": 0.1069,
257
- "rewards/accuracies": 0.893750011920929,
258
- "rewards/chosen": -0.6528670787811279,
259
- "rewards/margins": 1.8891479969024658,
260
- "rewards/rejected": -2.5420150756835938,
261
  "step": 170
262
  },
263
  {
264
  "epoch": 0.44,
265
  "learning_rate": 3.40665367563858e-07,
266
- "logits/chosen": -2.199352264404297,
267
- "logits/rejected": -2.0996298789978027,
268
- "logps/chosen": -520.5743408203125,
269
- "logps/rejected": -414.6077575683594,
270
- "loss": 0.0923,
271
- "rewards/accuracies": 0.8062499761581421,
272
- "rewards/chosen": -0.9770752787590027,
273
- "rewards/margins": 1.9050334692001343,
274
- "rewards/rejected": -2.882108688354492,
275
  "step": 180
276
  },
277
  {
278
  "epoch": 0.47,
279
  "learning_rate": 3.202449097526798e-07,
280
- "logits/chosen": -2.2049007415771484,
281
- "logits/rejected": -2.0761523246765137,
282
- "logps/chosen": -545.6790771484375,
283
- "logps/rejected": -503.28326416015625,
284
- "loss": 0.0757,
285
- "rewards/accuracies": 0.84375,
286
- "rewards/chosen": -1.1332416534423828,
287
- "rewards/margins": 2.640087604522705,
288
- "rewards/rejected": -3.773329257965088,
289
  "step": 190
290
  },
291
  {
292
  "epoch": 0.49,
293
  "learning_rate": 2.993015235369905e-07,
294
- "logits/chosen": -2.251674175262451,
295
- "logits/rejected": -2.138532876968384,
296
- "logps/chosen": -552.5167236328125,
297
- "logps/rejected": -459.761962890625,
298
- "loss": 0.0843,
299
- "rewards/accuracies": 0.862500011920929,
300
- "rewards/chosen": -1.0592725276947021,
301
- "rewards/margins": 2.3839969635009766,
302
- "rewards/rejected": -3.443269729614258,
303
  "step": 200
304
  },
305
  {
306
  "epoch": 0.52,
307
  "learning_rate": 2.7799111902582693e-07,
308
- "logits/chosen": -2.216552495956421,
309
- "logits/rejected": -2.116063356399536,
310
- "logps/chosen": -539.1038818359375,
311
- "logps/rejected": -468.47869873046875,
312
- "loss": 0.0889,
313
- "rewards/accuracies": 0.8374999761581421,
314
- "rewards/chosen": -1.1180602312088013,
315
- "rewards/margins": 2.2188849449157715,
316
- "rewards/rejected": -3.336945056915283,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 0.54,
321
  "learning_rate": 2.564723385445869e-07,
322
- "logits/chosen": -2.182385206222534,
323
- "logits/rejected": -2.0952038764953613,
324
- "logps/chosen": -506.44970703125,
325
- "logps/rejected": -433.21954345703125,
326
- "loss": 0.0928,
327
- "rewards/accuracies": 0.84375,
328
- "rewards/chosen": -1.1999738216400146,
329
- "rewards/margins": 1.8092005252838135,
330
- "rewards/rejected": -3.00917387008667,
331
  "step": 220
332
  },
333
  {
334
  "epoch": 0.57,
335
  "learning_rate": 2.3490537564442845e-07,
336
- "logits/chosen": -2.2019972801208496,
337
- "logits/rejected": -2.1013846397399902,
338
- "logps/chosen": -493.3079528808594,
339
- "logps/rejected": -435.74884033203125,
340
- "loss": 0.0853,
341
- "rewards/accuracies": 0.862500011920929,
342
- "rewards/chosen": -1.0892283916473389,
343
- "rewards/margins": 2.1229333877563477,
344
- "rewards/rejected": -3.2121620178222656,
345
  "step": 230
346
  },
347
  {
348
  "epoch": 0.59,
349
  "learning_rate": 2.1345078256378801e-07,
350
- "logits/chosen": -2.2013192176818848,
351
- "logits/rejected": -2.0689620971679688,
352
- "logps/chosen": -503.4147033691406,
353
- "logps/rejected": -448.7828674316406,
354
- "loss": 0.08,
355
- "rewards/accuracies": 0.8374999761581421,
356
- "rewards/chosen": -1.28254234790802,
357
- "rewards/margins": 2.0458292961120605,
358
- "rewards/rejected": -3.32837176322937,
359
  "step": 240
360
  },
361
  {
362
  "epoch": 0.62,
363
  "learning_rate": 1.9226827501969865e-07,
364
- "logits/chosen": -2.2620677947998047,
365
- "logits/rejected": -2.1344215869903564,
366
- "logps/chosen": -535.8228149414062,
367
- "logps/rejected": -445.83349609375,
368
- "loss": 0.0772,
369
- "rewards/accuracies": 0.887499988079071,
370
- "rewards/chosen": -0.9264475107192993,
371
- "rewards/margins": 2.3875911235809326,
372
- "rewards/rejected": -3.3140385150909424,
373
  "step": 250
374
  },
375
  {
376
  "epoch": 0.64,
377
  "learning_rate": 1.715155432264775e-07,
378
- "logits/chosen": -2.2268154621124268,
379
- "logits/rejected": -2.149883270263672,
380
- "logps/chosen": -518.2362060546875,
381
- "logps/rejected": -444.4808654785156,
382
- "loss": 0.0781,
383
- "rewards/accuracies": 0.8062499761581421,
384
- "rewards/chosen": -1.0662428140640259,
385
- "rewards/margins": 2.1827731132507324,
386
- "rewards/rejected": -3.249016284942627,
387
  "step": 260
388
  },
389
  {
390
  "epoch": 0.67,
391
  "learning_rate": 1.51347077992983e-07,
392
- "logits/chosen": -2.2635481357574463,
393
- "logits/rejected": -2.1383774280548096,
394
- "logps/chosen": -510.13751220703125,
395
- "logps/rejected": -457.0772399902344,
396
- "loss": 0.0781,
397
- "rewards/accuracies": 0.84375,
398
- "rewards/chosen": -1.0047271251678467,
399
- "rewards/margins": 2.324492931365967,
400
- "rewards/rejected": -3.3292198181152344,
401
  "step": 270
402
  },
403
  {
404
  "epoch": 0.69,
405
  "learning_rate": 1.3191302063739906e-07,
406
- "logits/chosen": -2.2417304515838623,
407
- "logits/rejected": -2.121445417404175,
408
- "logps/chosen": -491.12139892578125,
409
- "logps/rejected": -425.1968688964844,
410
- "loss": 0.0805,
411
- "rewards/accuracies": 0.90625,
412
- "rewards/chosen": -0.8460060954093933,
413
- "rewards/margins": 2.316087007522583,
414
- "rewards/rejected": -3.162093162536621,
415
  "step": 280
416
  },
417
  {
418
  "epoch": 0.72,
419
  "learning_rate": 1.1335804528119475e-07,
420
- "logits/chosen": -2.2970786094665527,
421
- "logits/rejected": -2.17289662361145,
422
- "logps/chosen": -507.46661376953125,
423
- "logps/rejected": -449.6181640625,
424
- "loss": 0.0812,
425
- "rewards/accuracies": 0.8500000238418579,
426
- "rewards/chosen": -1.0554336309432983,
427
- "rewards/margins": 2.2787580490112305,
428
- "rewards/rejected": -3.3341917991638184,
429
  "step": 290
430
  },
431
  {
432
  "epoch": 0.74,
433
  "learning_rate": 9.582028184286423e-08,
434
- "logits/chosen": -2.2653486728668213,
435
- "logits/rejected": -2.1368870735168457,
436
- "logps/chosen": -563.6956787109375,
437
- "logps/rejected": -506.1378479003906,
438
- "loss": 0.0811,
439
- "rewards/accuracies": 0.8500000238418579,
440
- "rewards/chosen": -1.0197770595550537,
441
- "rewards/margins": 2.7816028594970703,
442
- "rewards/rejected": -3.801379680633545,
443
  "step": 300
444
  },
445
  {
446
  "epoch": 0.76,
447
  "learning_rate": 7.943028774907065e-08,
448
- "logits/chosen": -2.212477445602417,
449
- "logits/rejected": -2.083967685699463,
450
- "logps/chosen": -543.1881103515625,
451
- "logps/rejected": -489.3092346191406,
452
- "loss": 0.072,
453
  "rewards/accuracies": 0.8500000238418579,
454
- "rewards/chosen": -1.234198808670044,
455
- "rewards/margins": 2.4262847900390625,
456
- "rewards/rejected": -3.6604835987091064,
457
  "step": 310
458
  },
459
  {
460
  "epoch": 0.79,
461
  "learning_rate": 6.431007601814637e-08,
462
- "logits/chosen": -2.237189531326294,
463
- "logits/rejected": -2.118699550628662,
464
- "logps/chosen": -533.3051147460938,
465
- "logps/rejected": -482.6719665527344,
466
- "loss": 0.0764,
467
- "rewards/accuracies": 0.793749988079071,
468
- "rewards/chosen": -1.317209243774414,
469
- "rewards/margins": 2.3056979179382324,
470
- "rewards/rejected": -3.6229069232940674,
471
  "step": 320
472
  },
473
  {
474
  "epoch": 0.81,
475
  "learning_rate": 5.0572206951246e-08,
476
- "logits/chosen": -2.262988567352295,
477
- "logits/rejected": -2.143887996673584,
478
- "logps/chosen": -538.3980712890625,
479
- "logps/rejected": -492.5909118652344,
480
- "loss": 0.0884,
481
- "rewards/accuracies": 0.824999988079071,
482
- "rewards/chosen": -1.2063168287277222,
483
- "rewards/margins": 2.362534523010254,
484
- "rewards/rejected": -3.5688509941101074,
485
  "step": 330
486
  },
487
  {
488
  "epoch": 0.84,
489
  "learning_rate": 3.831895019292897e-08,
490
- "logits/chosen": -2.211160182952881,
491
- "logits/rejected": -2.0655343532562256,
492
- "logps/chosen": -495.141357421875,
493
- "logps/rejected": -464.81646728515625,
494
- "loss": 0.0744,
495
- "rewards/accuracies": 0.84375,
496
- "rewards/chosen": -1.0855491161346436,
497
- "rewards/margins": 2.5346646308898926,
498
- "rewards/rejected": -3.6202139854431152,
499
  "step": 340
500
  },
501
  {
502
  "epoch": 0.86,
503
  "learning_rate": 2.764152339909756e-08,
504
- "logits/chosen": -2.273918867111206,
505
- "logits/rejected": -2.128694534301758,
506
- "logps/chosen": -528.7555541992188,
507
- "logps/rejected": -469.3017578125,
508
- "loss": 0.0642,
509
- "rewards/accuracies": 0.8374999761581421,
510
- "rewards/chosen": -1.1073048114776611,
511
- "rewards/margins": 2.4064698219299316,
512
- "rewards/rejected": -3.513774871826172,
513
  "step": 350
514
  },
515
  {
516
  "epoch": 0.89,
517
  "learning_rate": 1.861941317991664e-08,
518
- "logits/chosen": -2.2403626441955566,
519
- "logits/rejected": -2.1174542903900146,
520
- "logps/chosen": -488.1507873535156,
521
- "logps/rejected": -457.27423095703125,
522
- "loss": 0.0756,
523
- "rewards/accuracies": 0.8687499761581421,
524
- "rewards/chosen": -1.1823718547821045,
525
- "rewards/margins": 2.2865800857543945,
526
- "rewards/rejected": -3.46895170211792,
527
  "step": 360
528
  },
529
  {
530
  "epoch": 0.91,
531
  "learning_rate": 1.13197833728636e-08,
532
- "logits/chosen": -2.2277872562408447,
533
- "logits/rejected": -2.0999319553375244,
534
- "logps/chosen": -529.3685302734375,
535
- "logps/rejected": -459.83551025390625,
536
- "loss": 0.0729,
537
- "rewards/accuracies": 0.856249988079071,
538
- "rewards/chosen": -1.064345121383667,
539
- "rewards/margins": 2.42720365524292,
540
- "rewards/rejected": -3.491549015045166,
541
  "step": 370
542
  },
543
  {
544
  "epoch": 0.94,
545
  "learning_rate": 5.79697505093521e-09,
546
- "logits/chosen": -2.161315679550171,
547
- "logits/rejected": -2.051104784011841,
548
- "logps/chosen": -518.0635986328125,
549
- "logps/rejected": -463.9219665527344,
550
- "loss": 0.0742,
551
- "rewards/accuracies": 0.856249988079071,
552
- "rewards/chosen": -1.2277439832687378,
553
- "rewards/margins": 2.3196287155151367,
554
- "rewards/rejected": -3.547372817993164,
555
  "step": 380
556
  },
557
  {
558
  "epoch": 0.96,
559
  "learning_rate": 2.092101988131256e-09,
560
- "logits/chosen": -2.2838692665100098,
561
- "logits/rejected": -2.1495554447174072,
562
- "logps/chosen": -561.1207275390625,
563
- "logps/rejected": -473.84234619140625,
564
- "loss": 0.07,
565
- "rewards/accuracies": 0.856249988079071,
566
- "rewards/chosen": -1.088555932044983,
567
- "rewards/margins": 2.4874186515808105,
568
- "rewards/rejected": -3.575974702835083,
569
  "step": 390
570
  },
571
  {
572
  "epoch": 0.99,
573
  "learning_rate": 2.327445937151673e-10,
574
- "logits/chosen": -2.214580774307251,
575
- "logits/rejected": -2.0943350791931152,
576
- "logps/chosen": -564.2920532226562,
577
- "logps/rejected": -481.778564453125,
578
- "loss": 0.0661,
579
- "rewards/accuracies": 0.8374999761581421,
580
- "rewards/chosen": -1.2679953575134277,
581
- "rewards/margins": 2.343169689178467,
582
- "rewards/rejected": -3.6111652851104736,
583
  "step": 400
584
  },
585
  {
586
  "epoch": 1.0,
587
  "step": 405,
588
  "total_flos": 0.0,
589
- "train_loss": 0.13281457475674005,
590
- "train_runtime": 3219.0486,
591
- "train_samples_per_second": 16.121,
592
  "train_steps_per_second": 0.126
593
  }
594
  ],
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.2195121951219512e-08,
14
+ "logits/chosen": -2.8088459968566895,
15
+ "logits/rejected": -2.7595884799957275,
16
+ "logps/chosen": -368.90777587890625,
17
+ "logps/rejected": -133.10202026367188,
18
+ "loss": 0.4545,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
 
25
  {
26
  "epoch": 0.02,
27
  "learning_rate": 1.219512195121951e-07,
28
+ "logits/chosen": -2.83878231048584,
29
+ "logits/rejected": -2.824958562850952,
30
+ "logps/chosen": -433.8194580078125,
31
+ "logps/rejected": -114.66372680664062,
32
+ "loss": 0.4227,
33
+ "rewards/accuracies": 0.5416666865348816,
34
+ "rewards/chosen": 0.0010460919002071023,
35
+ "rewards/margins": 0.0013696590904146433,
36
+ "rewards/rejected": -0.000323567190207541,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.05,
41
  "learning_rate": 2.439024390243902e-07,
42
+ "logits/chosen": -2.7982840538024902,
43
+ "logits/rejected": -2.7652382850646973,
44
+ "logps/chosen": -436.67694091796875,
45
+ "logps/rejected": -109.33970642089844,
46
+ "loss": 0.4254,
47
+ "rewards/accuracies": 0.7562500238418579,
48
+ "rewards/chosen": 0.020646633580327034,
49
+ "rewards/margins": 0.03670011833310127,
50
+ "rewards/rejected": -0.01605348475277424,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 3.6585365853658536e-07,
56
+ "logits/chosen": -2.717103958129883,
57
+ "logits/rejected": -2.6900384426116943,
58
+ "logps/chosen": -422.26702880859375,
59
+ "logps/rejected": -128.1683349609375,
60
+ "loss": 0.4108,
61
  "rewards/accuracies": 0.7749999761581421,
62
+ "rewards/chosen": 0.07094015926122665,
63
+ "rewards/margins": 0.20011821389198303,
64
+ "rewards/rejected": -0.12917804718017578,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.1,
69
  "learning_rate": 4.878048780487804e-07,
70
+ "logits/chosen": -2.590641498565674,
71
+ "logits/rejected": -2.5721707344055176,
72
+ "logps/chosen": -396.3973693847656,
73
+ "logps/rejected": -139.94859313964844,
74
+ "loss": 0.3881,
75
+ "rewards/accuracies": 0.762499988079071,
76
+ "rewards/chosen": 0.022975314408540726,
77
+ "rewards/margins": 0.4287249445915222,
78
+ "rewards/rejected": -0.4057496190071106,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.12,
83
  "learning_rate": 4.992461696250783e-07,
84
+ "logits/chosen": -2.42146635055542,
85
+ "logits/rejected": -2.394202709197998,
86
+ "logps/chosen": -445.91644287109375,
87
+ "logps/rejected": -205.5404052734375,
88
+ "loss": 0.316,
89
+ "rewards/accuracies": 0.800000011920929,
90
+ "rewards/chosen": -0.03603144362568855,
91
+ "rewards/margins": 0.8689195513725281,
92
+ "rewards/rejected": -0.9049509763717651,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 4.966461721767899e-07,
98
+ "logits/chosen": -2.417520046234131,
99
+ "logits/rejected": -2.3663182258605957,
100
+ "logps/chosen": -422.27215576171875,
101
+ "logps/rejected": -255.75912475585938,
102
+ "loss": 0.2661,
103
+ "rewards/accuracies": 0.7437499761581421,
104
+ "rewards/chosen": -0.35185474157333374,
105
+ "rewards/margins": 0.9545990228652954,
106
+ "rewards/rejected": -1.3064535856246948,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.17,
111
  "learning_rate": 4.922100518015975e-07,
112
+ "logits/chosen": -2.45034122467041,
113
+ "logits/rejected": -2.397273540496826,
114
+ "logps/chosen": -428.19207763671875,
115
+ "logps/rejected": -294.82501220703125,
116
+ "loss": 0.198,
117
+ "rewards/accuracies": 0.7875000238418579,
118
+ "rewards/chosen": -0.4432826638221741,
119
+ "rewards/margins": 1.3181250095367432,
120
+ "rewards/rejected": -1.7614076137542725,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.2,
125
  "learning_rate": 4.859708325770919e-07,
126
+ "logits/chosen": -2.3751111030578613,
127
+ "logits/rejected": -2.321465015411377,
128
+ "logps/chosen": -468.4130859375,
129
+ "logps/rejected": -331.666259765625,
130
+ "loss": 0.162,
131
+ "rewards/accuracies": 0.793749988079071,
132
+ "rewards/chosen": -0.7735603451728821,
133
+ "rewards/margins": 1.4863694906234741,
134
+ "rewards/rejected": -2.25993013381958,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.22,
139
  "learning_rate": 4.779749614980225e-07,
140
+ "logits/chosen": -2.381338596343994,
141
+ "logits/rejected": -2.327340602874756,
142
+ "logps/chosen": -520.724365234375,
143
+ "logps/rejected": -380.0218811035156,
144
+ "loss": 0.1464,
145
+ "rewards/accuracies": 0.8812500238418579,
146
+ "rewards/chosen": -0.4466208815574646,
147
+ "rewards/margins": 2.054797649383545,
148
+ "rewards/rejected": -2.5014188289642334,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.25,
153
  "learning_rate": 4.682819627081427e-07,
154
+ "logits/chosen": -2.3299832344055176,
155
+ "logits/rejected": -2.2486767768859863,
156
+ "logps/chosen": -477.24261474609375,
157
+ "logps/rejected": -372.49017333984375,
158
+ "loss": 0.1456,
159
+ "rewards/accuracies": 0.862500011920929,
160
+ "rewards/chosen": -0.6445478200912476,
161
+ "rewards/margins": 1.895777702331543,
162
+ "rewards/rejected": -2.54032564163208,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 0.27,
167
  "learning_rate": 4.569639943810477e-07,
168
+ "logits/chosen": -2.3097102642059326,
169
+ "logits/rejected": -2.226323127746582,
170
+ "logps/chosen": -495.50469970703125,
171
+ "logps/rejected": -389.80078125,
172
+ "loss": 0.1283,
173
+ "rewards/accuracies": 0.78125,
174
+ "rewards/chosen": -0.8834150433540344,
175
+ "rewards/margins": 1.8450326919555664,
176
+ "rewards/rejected": -2.728447675704956,
177
  "step": 110
178
  },
179
  {
180
  "epoch": 0.3,
181
  "learning_rate": 4.4410531154874543e-07,
182
+ "logits/chosen": -2.3541078567504883,
183
+ "logits/rejected": -2.2549960613250732,
184
+ "logps/chosen": -524.7901000976562,
185
+ "logps/rejected": -398.75775146484375,
186
+ "loss": 0.1283,
187
+ "rewards/accuracies": 0.78125,
188
+ "rewards/chosen": -0.7295175790786743,
189
+ "rewards/margins": 1.9527451992034912,
190
+ "rewards/rejected": -2.682262897491455,
191
  "step": 120
192
  },
193
  {
194
  "epoch": 0.32,
195
  "learning_rate": 4.298016388768561e-07,
196
+ "logits/chosen": -2.3804497718811035,
197
+ "logits/rejected": -2.2821872234344482,
198
+ "logps/chosen": -518.573974609375,
199
+ "logps/rejected": -398.14306640625,
200
+ "loss": 0.114,
201
+ "rewards/accuracies": 0.84375,
202
+ "rewards/chosen": -0.5337150692939758,
203
+ "rewards/margins": 2.2159152030944824,
204
+ "rewards/rejected": -2.7496302127838135,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 0.35,
209
  "learning_rate": 4.1415945805573005e-07,
210
+ "logits/chosen": -2.309293270111084,
211
+ "logits/rejected": -2.2271227836608887,
212
+ "logps/chosen": -486.838623046875,
213
+ "logps/rejected": -373.0490417480469,
214
+ "loss": 0.1246,
215
+ "rewards/accuracies": 0.862500011920929,
216
+ "rewards/chosen": -0.6586702466011047,
217
+ "rewards/margins": 1.7459022998809814,
218
+ "rewards/rejected": -2.4045722484588623,
219
  "step": 140
220
  },
221
  {
222
  "epoch": 0.37,
223
  "learning_rate": 3.972952151123984e-07,
224
+ "logits/chosen": -2.288892984390259,
225
+ "logits/rejected": -2.1915061473846436,
226
+ "logps/chosen": -450.01556396484375,
227
+ "logps/rejected": -368.2213439941406,
228
+ "loss": 0.1152,
229
+ "rewards/accuracies": 0.856249988079071,
230
+ "rewards/chosen": -0.7529748678207397,
231
+ "rewards/margins": 1.9199845790863037,
232
+ "rewards/rejected": -2.672959566116333,
233
  "step": 150
234
  },
235
  {
236
  "epoch": 0.39,
237
  "learning_rate": 3.793344535444142e-07,
238
+ "logits/chosen": -2.2575857639312744,
239
+ "logits/rejected": -2.1550350189208984,
240
+ "logps/chosen": -547.2183837890625,
241
+ "logps/rejected": -409.57989501953125,
242
+ "loss": 0.088,
243
+ "rewards/accuracies": 0.8125,
244
+ "rewards/chosen": -0.8003584146499634,
245
+ "rewards/margins": 2.1838386058807373,
246
+ "rewards/rejected": -2.9841971397399902,
247
  "step": 160
248
  },
249
  {
250
  "epoch": 0.42,
251
  "learning_rate": 3.604108797288461e-07,
252
+ "logits/chosen": -2.2742323875427246,
253
+ "logits/rejected": -2.167198419570923,
254
+ "logps/chosen": -547.2274169921875,
255
+ "logps/rejected": -456.614501953125,
256
+ "loss": 0.0776,
257
+ "rewards/accuracies": 0.862500011920929,
258
+ "rewards/chosen": -1.0825190544128418,
259
+ "rewards/margins": 2.3789236545562744,
260
+ "rewards/rejected": -3.4614429473876953,
261
  "step": 170
262
  },
263
  {
264
  "epoch": 0.44,
265
  "learning_rate": 3.40665367563858e-07,
266
+ "logits/chosen": -2.2402544021606445,
267
+ "logits/rejected": -2.1346538066864014,
268
+ "logps/chosen": -564.0145263671875,
269
+ "logps/rejected": -489.21160888671875,
270
+ "loss": 0.0697,
271
+ "rewards/accuracies": 0.7749999761581421,
272
+ "rewards/chosen": -1.539156198501587,
273
+ "rewards/margins": 2.1975486278533936,
274
+ "rewards/rejected": -3.7367050647735596,
275
  "step": 180
276
  },
277
  {
278
  "epoch": 0.47,
279
  "learning_rate": 3.202449097526798e-07,
280
+ "logits/chosen": -2.3025131225585938,
281
+ "logits/rejected": -2.224256992340088,
282
+ "logps/chosen": -505.39520263671875,
283
+ "logps/rejected": -423.83026123046875,
284
+ "loss": 0.0811,
285
+ "rewards/accuracies": 0.831250011920929,
286
+ "rewards/chosen": -1.0325360298156738,
287
+ "rewards/margins": 2.132319927215576,
288
+ "rewards/rejected": -3.16485595703125,
289
  "step": 190
290
  },
291
  {
292
  "epoch": 0.49,
293
  "learning_rate": 2.993015235369905e-07,
294
+ "logits/chosen": -2.3023552894592285,
295
+ "logits/rejected": -2.2043874263763428,
296
+ "logps/chosen": -525.6875610351562,
297
+ "logps/rejected": -416.1629333496094,
298
+ "loss": 0.0979,
299
+ "rewards/accuracies": 0.856249988079071,
300
+ "rewards/chosen": -0.8641435503959656,
301
+ "rewards/margins": 2.118994951248169,
302
+ "rewards/rejected": -2.9831383228302,
303
  "step": 200
304
  },
305
  {
306
  "epoch": 0.52,
307
  "learning_rate": 2.7799111902582693e-07,
308
+ "logits/chosen": -2.3067820072174072,
309
+ "logits/rejected": -2.2110161781311035,
310
+ "logps/chosen": -492.69927978515625,
311
+ "logps/rejected": -381.31878662109375,
312
+ "loss": 0.0872,
313
+ "rewards/accuracies": 0.793749988079071,
314
+ "rewards/chosen": -0.9221334457397461,
315
+ "rewards/margins": 1.870031714439392,
316
+ "rewards/rejected": -2.7921650409698486,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 0.54,
321
  "learning_rate": 2.564723385445869e-07,
322
+ "logits/chosen": -2.3406167030334473,
323
+ "logits/rejected": -2.2510488033294678,
324
+ "logps/chosen": -520.8443603515625,
325
+ "logps/rejected": -442.00732421875,
326
+ "loss": 0.0908,
327
+ "rewards/accuracies": 0.8062499761581421,
328
+ "rewards/chosen": -1.0322405099868774,
329
+ "rewards/margins": 2.144731044769287,
330
+ "rewards/rejected": -3.176971912384033,
331
  "step": 220
332
  },
333
  {
334
  "epoch": 0.57,
335
  "learning_rate": 2.3490537564442845e-07,
336
+ "logits/chosen": -2.284823179244995,
337
+ "logits/rejected": -2.1653401851654053,
338
+ "logps/chosen": -511.96929931640625,
339
+ "logps/rejected": -426.46356201171875,
340
+ "loss": 0.0967,
341
+ "rewards/accuracies": 0.793749988079071,
342
+ "rewards/chosen": -1.2002372741699219,
343
+ "rewards/margins": 1.9990075826644897,
344
+ "rewards/rejected": -3.199244976043701,
345
  "step": 230
346
  },
347
  {
348
  "epoch": 0.59,
349
  "learning_rate": 2.1345078256378801e-07,
350
+ "logits/chosen": -2.321927547454834,
351
+ "logits/rejected": -2.215357780456543,
352
+ "logps/chosen": -495.8760681152344,
353
+ "logps/rejected": -439.46282958984375,
354
+ "loss": 0.0955,
355
+ "rewards/accuracies": 0.862500011920929,
356
+ "rewards/chosen": -0.8706371188163757,
357
+ "rewards/margins": 2.3429722785949707,
358
+ "rewards/rejected": -3.213609218597412,
359
  "step": 240
360
  },
361
  {
362
  "epoch": 0.62,
363
  "learning_rate": 1.9226827501969865e-07,
364
+ "logits/chosen": -2.3428966999053955,
365
+ "logits/rejected": -2.2573530673980713,
366
+ "logps/chosen": -526.4675903320312,
367
+ "logps/rejected": -451.949462890625,
368
+ "loss": 0.096,
369
+ "rewards/accuracies": 0.8374999761581421,
370
+ "rewards/chosen": -0.8379364013671875,
371
+ "rewards/margins": 2.499549627304077,
372
+ "rewards/rejected": -3.3374857902526855,
373
  "step": 250
374
  },
375
  {
376
  "epoch": 0.64,
377
  "learning_rate": 1.715155432264775e-07,
378
+ "logits/chosen": -2.3556008338928223,
379
+ "logits/rejected": -2.2766494750976562,
380
+ "logps/chosen": -516.3786010742188,
381
+ "logps/rejected": -430.13916015625,
382
+ "loss": 0.0857,
383
+ "rewards/accuracies": 0.875,
384
+ "rewards/chosen": -0.8434340357780457,
385
+ "rewards/margins": 2.294442653656006,
386
+ "rewards/rejected": -3.1378769874572754,
387
  "step": 260
388
  },
389
  {
390
  "epoch": 0.67,
391
  "learning_rate": 1.51347077992983e-07,
392
+ "logits/chosen": -2.3460044860839844,
393
+ "logits/rejected": -2.281031370162964,
394
+ "logps/chosen": -490.55078125,
395
+ "logps/rejected": -423.6560974121094,
396
+ "loss": 0.0821,
397
+ "rewards/accuracies": 0.856249988079071,
398
+ "rewards/chosen": -0.8685197830200195,
399
+ "rewards/margins": 2.1445822715759277,
400
+ "rewards/rejected": -3.0131022930145264,
401
  "step": 270
402
  },
403
  {
404
  "epoch": 0.69,
405
  "learning_rate": 1.3191302063739906e-07,
406
+ "logits/chosen": -2.2882773876190186,
407
+ "logits/rejected": -2.218071699142456,
408
+ "logps/chosen": -500.769287109375,
409
+ "logps/rejected": -446.246826171875,
410
+ "loss": 0.0712,
411
+ "rewards/accuracies": 0.8187500238418579,
412
+ "rewards/chosen": -1.2157343626022339,
413
+ "rewards/margins": 2.1158077716827393,
414
+ "rewards/rejected": -3.3315422534942627,
415
  "step": 280
416
  },
417
  {
418
  "epoch": 0.72,
419
  "learning_rate": 1.1335804528119475e-07,
420
+ "logits/chosen": -2.3649039268493652,
421
+ "logits/rejected": -2.252676486968994,
422
+ "logps/chosen": -540.1212158203125,
423
+ "logps/rejected": -467.2939453125,
424
+ "loss": 0.0686,
425
+ "rewards/accuracies": 0.8687499761581421,
426
+ "rewards/chosen": -1.0436217784881592,
427
+ "rewards/margins": 2.6221861839294434,
428
+ "rewards/rejected": -3.6658082008361816,
429
  "step": 290
430
  },
431
  {
432
  "epoch": 0.74,
433
  "learning_rate": 9.582028184286423e-08,
434
+ "logits/chosen": -2.243900775909424,
435
+ "logits/rejected": -2.1746292114257812,
436
+ "logps/chosen": -503.1402282714844,
437
+ "logps/rejected": -486.1592712402344,
438
+ "loss": 0.0686,
439
+ "rewards/accuracies": 0.84375,
440
+ "rewards/chosen": -1.4328491687774658,
441
+ "rewards/margins": 2.2128751277923584,
442
+ "rewards/rejected": -3.6457245349884033,
443
  "step": 300
444
  },
445
  {
446
  "epoch": 0.76,
447
  "learning_rate": 7.943028774907065e-08,
448
+ "logits/chosen": -2.2528328895568848,
449
+ "logits/rejected": -2.170386791229248,
450
+ "logps/chosen": -501.7100524902344,
451
+ "logps/rejected": -471.88897705078125,
452
+ "loss": 0.0689,
453
  "rewards/accuracies": 0.8500000238418579,
454
+ "rewards/chosen": -1.172387719154358,
455
+ "rewards/margins": 2.3613522052764893,
456
+ "rewards/rejected": -3.533740282058716,
457
  "step": 310
458
  },
459
  {
460
  "epoch": 0.79,
461
  "learning_rate": 6.431007601814637e-08,
462
+ "logits/chosen": -2.258288860321045,
463
+ "logits/rejected": -2.1915061473846436,
464
+ "logps/chosen": -471.57330322265625,
465
+ "logps/rejected": -461.84417724609375,
466
+ "loss": 0.0601,
467
+ "rewards/accuracies": 0.8187500238418579,
468
+ "rewards/chosen": -1.4386770725250244,
469
+ "rewards/margins": 2.1069023609161377,
470
+ "rewards/rejected": -3.545579433441162,
471
  "step": 320
472
  },
473
  {
474
  "epoch": 0.81,
475
  "learning_rate": 5.0572206951246e-08,
476
+ "logits/chosen": -2.2368595600128174,
477
+ "logits/rejected": -2.1402342319488525,
478
+ "logps/chosen": -522.8599853515625,
479
+ "logps/rejected": -482.84893798828125,
480
+ "loss": 0.0626,
481
+ "rewards/accuracies": 0.78125,
482
+ "rewards/chosen": -1.5531214475631714,
483
+ "rewards/margins": 2.2123360633850098,
484
+ "rewards/rejected": -3.7654571533203125,
485
  "step": 330
486
  },
487
  {
488
  "epoch": 0.84,
489
  "learning_rate": 3.831895019292897e-08,
490
+ "logits/chosen": -2.308152675628662,
491
+ "logits/rejected": -2.2120919227600098,
492
+ "logps/chosen": -565.0369873046875,
493
+ "logps/rejected": -535.8488159179688,
494
+ "loss": 0.0642,
495
+ "rewards/accuracies": 0.831250011920929,
496
+ "rewards/chosen": -1.306236982345581,
497
+ "rewards/margins": 2.8749289512634277,
498
+ "rewards/rejected": -4.181166172027588,
499
  "step": 340
500
  },
501
  {
502
  "epoch": 0.86,
503
  "learning_rate": 2.764152339909756e-08,
504
+ "logits/chosen": -2.245577573776245,
505
+ "logits/rejected": -2.1435444355010986,
506
+ "logps/chosen": -546.0943603515625,
507
+ "logps/rejected": -454.5082092285156,
508
+ "loss": 0.0636,
509
+ "rewards/accuracies": 0.8500000238418579,
510
+ "rewards/chosen": -1.2082496881484985,
511
+ "rewards/margins": 2.3495194911956787,
512
+ "rewards/rejected": -3.5577690601348877,
513
  "step": 350
514
  },
515
  {
516
  "epoch": 0.89,
517
  "learning_rate": 1.861941317991664e-08,
518
+ "logits/chosen": -2.302865505218506,
519
+ "logits/rejected": -2.1724164485931396,
520
+ "logps/chosen": -559.6376953125,
521
+ "logps/rejected": -483.40771484375,
522
+ "loss": 0.0675,
523
+ "rewards/accuracies": 0.875,
524
+ "rewards/chosen": -1.0334274768829346,
525
+ "rewards/margins": 2.655003070831299,
526
+ "rewards/rejected": -3.6884307861328125,
527
  "step": 360
528
  },
529
  {
530
  "epoch": 0.91,
531
  "learning_rate": 1.13197833728636e-08,
532
+ "logits/chosen": -2.2556536197662354,
533
+ "logits/rejected": -2.153872013092041,
534
+ "logps/chosen": -521.9984130859375,
535
+ "logps/rejected": -505.71673583984375,
536
+ "loss": 0.06,
537
+ "rewards/accuracies": 0.8812500238418579,
538
+ "rewards/chosen": -1.1932189464569092,
539
+ "rewards/margins": 2.7444043159484863,
540
+ "rewards/rejected": -3.9376235008239746,
541
  "step": 370
542
  },
543
  {
544
  "epoch": 0.94,
545
  "learning_rate": 5.79697505093521e-09,
546
+ "logits/chosen": -2.2588906288146973,
547
+ "logits/rejected": -2.159388303756714,
548
+ "logps/chosen": -529.9054565429688,
549
+ "logps/rejected": -461.11700439453125,
550
+ "loss": 0.0744,
551
+ "rewards/accuracies": 0.7749999761581421,
552
+ "rewards/chosen": -1.276084065437317,
553
+ "rewards/margins": 2.2816543579101562,
554
+ "rewards/rejected": -3.5577385425567627,
555
  "step": 380
556
  },
557
  {
558
  "epoch": 0.96,
559
  "learning_rate": 2.092101988131256e-09,
560
+ "logits/chosen": -2.313697099685669,
561
+ "logits/rejected": -2.171175003051758,
562
+ "logps/chosen": -565.225830078125,
563
+ "logps/rejected": -489.6360778808594,
564
+ "loss": 0.0609,
565
+ "rewards/accuracies": 0.893750011920929,
566
+ "rewards/chosen": -1.0208733081817627,
567
+ "rewards/margins": 2.784264087677002,
568
+ "rewards/rejected": -3.8051371574401855,
569
  "step": 390
570
  },
571
  {
572
  "epoch": 0.99,
573
  "learning_rate": 2.327445937151673e-10,
574
+ "logits/chosen": -2.29669189453125,
575
+ "logits/rejected": -2.1986515522003174,
576
+ "logps/chosen": -561.0698852539062,
577
+ "logps/rejected": -510.22021484375,
578
+ "loss": 0.0666,
579
+ "rewards/accuracies": 0.875,
580
+ "rewards/chosen": -1.1245156526565552,
581
+ "rewards/margins": 2.6927759647369385,
582
+ "rewards/rejected": -3.817291736602783,
583
  "step": 400
584
  },
585
  {
586
  "epoch": 1.0,
587
  "step": 405,
588
  "total_flos": 0.0,
589
+ "train_loss": 0.13438091388455145,
590
+ "train_runtime": 3218.6044,
591
+ "train_samples_per_second": 16.123,
592
  "train_steps_per_second": 0.126
593
  }
594
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34facb59b2833ff9a65d1ea6ca0671f7143189081be77d079ad67a7343d5aa7d
3
  size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d05f2ea4a8f27ac4989592d034e456f8fe99958c58d076bd3ccb965c582e16a
3
  size 5944