jikaixuan commited on
Commit
6b1b603
1 Parent(s): 3c9b215

Model save

Browse files
README.md CHANGED
@@ -2,16 +2,11 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - dpo
8
- - generated_from_trainer
9
  - trl
10
  - dpo
 
11
  - generated_from_trainer
12
  base_model: mistralai/Mistral-7B-v0.1
13
- datasets:
14
- - HuggingFaceH4/ultrafeedback_binarized
15
  model-index:
16
  - name: zephyr-7b
17
  results: []
@@ -22,17 +17,17 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # zephyr-7b
24
 
25
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 0.6171
28
- - Rewards/chosen: -0.4648
29
- - Rewards/rejected: -0.8388
30
  - Rewards/accuracies: 0.3711
31
- - Rewards/margins: 0.3740
32
- - Logps/rejected: -161.0705
33
- - Logps/chosen: -110.3948
34
- - Logits/rejected: 1.0411
35
- - Logits/chosen: 0.9868
36
  - Use Label: 0.0
37
  - Pred Label: 0.0
38
 
@@ -71,10 +66,10 @@ The following hyperparameters were used during training:
71
 
72
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
73
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
74
- | 0.6553 | 0.21 | 100 | 0.6557 | -0.1267 | -0.2685 | 0.3633 | 0.1419 | -104.0477 | -76.5787 | -2.0726 | -2.0833 | 0.0 | 0.0 |
75
- | 0.6446 | 0.42 | 200 | 0.6343 | -0.2873 | -0.5376 | 0.3828 | 0.2503 | -130.9503 | -92.6377 | -0.6864 | -0.7124 | 0.0 | 0.0 |
76
- | 0.6273 | 0.63 | 300 | 0.6204 | -0.4623 | -0.7994 | 0.3672 | 0.3371 | -157.1332 | -110.1469 | 0.6726 | 0.6280 | 0.0 | 0.0 |
77
- | 0.6165 | 0.84 | 400 | 0.6182 | -0.4457 | -0.8122 | 0.3672 | 0.3666 | -158.4149 | -108.4784 | 0.9580 | 0.9035 | 0.0 | 0.0 |
78
 
79
 
80
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
 
 
5
  - trl
6
  - dpo
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: mistralai/Mistral-7B-v0.1
 
 
10
  model-index:
11
  - name: zephyr-7b
12
  results: []
 
17
 
18
  # zephyr-7b
19
 
20
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.6157
23
+ - Rewards/chosen: -0.4865
24
+ - Rewards/rejected: -0.8500
25
  - Rewards/accuracies: 0.3711
26
+ - Rewards/margins: 0.3636
27
+ - Logps/rejected: -162.1976
28
+ - Logps/chosen: -112.5605
29
+ - Logits/rejected: 1.5453
30
+ - Logits/chosen: 1.4533
31
  - Use Label: 0.0
32
  - Pred Label: 0.0
33
 
 
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
68
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
69
+ | 0.6551 | 0.21 | 100 | 0.6526 | -0.2364 | -0.3728 | 0.3359 | 0.1364 | -114.4721 | -87.5525 | -1.7460 | -1.7620 | 0.0 | 0.0 |
70
+ | 0.6376 | 0.42 | 200 | 0.6289 | -0.3405 | -0.6072 | 0.3672 | 0.2667 | -137.9142 | -97.9614 | 0.0432 | -0.0238 | 0.0 | 0.0 |
71
+ | 0.6196 | 0.63 | 300 | 0.6189 | -0.3871 | -0.7293 | 0.375 | 0.3422 | -150.1250 | -102.6218 | 1.1831 | 1.0945 | 0.0 | 0.0 |
72
+ | 0.6139 | 0.84 | 400 | 0.6157 | -0.4865 | -0.8500 | 0.3711 | 0.3636 | -162.1976 | -112.5605 | 1.5453 | 1.4533 | 0.0 | 0.0 |
73
 
74
 
75
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2e1f0bfb0d758cdedb2cd45f7662f8bc813716b3404510234c43038042c9886
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28859559ead1b89c8a8bb9f09b1b677412e14c5c17bfd4811e08be08821f0e55
3
  size 671150064
all_results.json CHANGED
@@ -15,9 +15,9 @@
15
  "eval_samples_per_second": 15.947,
16
  "eval_steps_per_second": 0.255,
17
  "eval_use_label": 0.0,
18
- "train_loss": 0.6389844682481554,
19
- "train_runtime": 9615.2592,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 6.358,
22
  "train_steps_per_second": 0.05
23
  }
 
15
  "eval_samples_per_second": 15.947,
16
  "eval_steps_per_second": 0.255,
17
  "eval_use_label": 0.0,
18
+ "train_loss": 0.6357159084743924,
19
+ "train_runtime": 9601.7268,
20
  "train_samples": 61135,
21
+ "train_samples_per_second": 6.367,
22
  "train_steps_per_second": 0.05
23
  }
runs/Mar20_15-13-30_uclaml04.cs.ucla.edu/events.out.tfevents.1710972859.uclaml04.cs.ucla.edu.3989465.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:888fe7d2c19e3333a49dacd02fa3417075bbcd270b0229a62979815675a95051
3
- size 45086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73d6b9e5ad9da8999138eb89142dd7eb3d77188a523892441b841aab3d42a2f
3
+ size 47034
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6389844682481554,
4
- "train_runtime": 9615.2592,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 6.358,
7
  "train_steps_per_second": 0.05
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6357159084743924,
4
+ "train_runtime": 9601.7268,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 6.367,
7
  "train_steps_per_second": 0.05
8
  }
trainer_state.json CHANGED
@@ -29,870 +29,870 @@
29
  "epoch": 0.02,
30
  "grad_norm": 0.4609375,
31
  "learning_rate": 1.0416666666666667e-06,
32
- "logits/chosen": -2.242556571960449,
33
- "logits/rejected": -2.277317762374878,
34
- "logps/chosen": -51.96327209472656,
35
- "logps/rejected": -64.98894500732422,
36
  "loss": 0.6929,
37
  "pred_label": 0.0,
38
- "rewards/accuracies": 0.2361111044883728,
39
- "rewards/chosen": 0.002160965697839856,
40
- "rewards/margins": 0.0009470728691667318,
41
- "rewards/rejected": 0.0012138929450884461,
42
  "step": 10,
43
  "use_label": 0.0
44
  },
45
  {
46
  "epoch": 0.04,
47
- "grad_norm": 0.396484375,
48
  "learning_rate": 2.0833333333333334e-06,
49
- "logits/chosen": -2.252474784851074,
50
- "logits/rejected": -2.256141185760498,
51
- "logps/chosen": -62.50165557861328,
52
- "logps/rejected": -72.6328125,
53
  "loss": 0.6919,
54
  "pred_label": 0.0,
55
- "rewards/accuracies": 0.28125,
56
- "rewards/chosen": 0.01592240110039711,
57
- "rewards/margins": 0.001004441175609827,
58
- "rewards/rejected": 0.014917959459125996,
59
  "step": 20,
60
  "use_label": 0.0
61
  },
62
  {
63
  "epoch": 0.06,
64
- "grad_norm": 0.51171875,
65
  "learning_rate": 3.125e-06,
66
- "logits/chosen": -2.342515468597412,
67
- "logits/rejected": -2.3552591800689697,
68
- "logps/chosen": -79.15455627441406,
69
- "logps/rejected": -98.8229751586914,
70
  "loss": 0.6898,
71
  "pred_label": 0.0,
72
  "rewards/accuracies": 0.2874999940395355,
73
- "rewards/chosen": 0.030873581767082214,
74
- "rewards/margins": 0.002844910603016615,
75
- "rewards/rejected": 0.02802867256104946,
76
  "step": 30,
77
  "use_label": 0.0
78
  },
79
  {
80
  "epoch": 0.08,
81
- "grad_norm": 0.51953125,
82
  "learning_rate": 4.166666666666667e-06,
83
- "logits/chosen": -2.323695421218872,
84
- "logits/rejected": -2.3019304275512695,
85
- "logps/chosen": -82.8508071899414,
86
- "logps/rejected": -82.39540100097656,
87
  "loss": 0.6866,
88
  "pred_label": 0.0,
89
  "rewards/accuracies": 0.2874999940395355,
90
- "rewards/chosen": 0.033413294702768326,
91
- "rewards/margins": 0.011912978254258633,
92
- "rewards/rejected": 0.021500317379832268,
93
  "step": 40,
94
  "use_label": 0.0
95
  },
96
  {
97
  "epoch": 0.1,
98
- "grad_norm": 0.6640625,
99
  "learning_rate": 4.999731868769027e-06,
100
- "logits/chosen": -2.2408015727996826,
101
- "logits/rejected": -2.2638282775878906,
102
- "logps/chosen": -67.89698028564453,
103
- "logps/rejected": -81.84117126464844,
104
  "loss": 0.6805,
105
  "pred_label": 0.0,
106
  "rewards/accuracies": 0.32499998807907104,
107
- "rewards/chosen": 0.009338948875665665,
108
- "rewards/margins": 0.030354563146829605,
109
- "rewards/rejected": -0.02101561427116394,
110
  "step": 50,
111
  "use_label": 0.0
112
  },
113
  {
114
  "epoch": 0.13,
115
- "grad_norm": 1.53125,
116
  "learning_rate": 4.9903533134293035e-06,
117
- "logits/chosen": -2.2194154262542725,
118
- "logits/rejected": -2.1603574752807617,
119
- "logps/chosen": -62.444313049316406,
120
- "logps/rejected": -72.18606567382812,
121
- "loss": 0.6753,
122
  "pred_label": 0.0,
123
- "rewards/accuracies": 0.30000001192092896,
124
- "rewards/chosen": -0.027180707082152367,
125
- "rewards/margins": 0.044989973306655884,
126
- "rewards/rejected": -0.072170689702034,
127
  "step": 60,
128
  "use_label": 0.0
129
  },
130
  {
131
  "epoch": 0.15,
132
- "grad_norm": 1.84375,
133
  "learning_rate": 4.967625656594782e-06,
134
- "logits/chosen": -2.1111249923706055,
135
- "logits/rejected": -2.109537124633789,
136
- "logps/chosen": -62.041603088378906,
137
- "logps/rejected": -75.64030456542969,
138
- "loss": 0.666,
139
  "pred_label": 0.0,
140
  "rewards/accuracies": 0.25,
141
- "rewards/chosen": -0.06330498307943344,
142
- "rewards/margins": 0.03508424013853073,
143
- "rewards/rejected": -0.09838922321796417,
144
  "step": 70,
145
  "use_label": 0.0
146
  },
147
  {
148
  "epoch": 0.17,
149
- "grad_norm": 1.03125,
150
  "learning_rate": 4.93167072587771e-06,
151
- "logits/chosen": -2.21980881690979,
152
- "logits/rejected": -2.1616053581237793,
153
- "logps/chosen": -60.844932556152344,
154
- "logps/rejected": -74.95368957519531,
155
- "loss": 0.66,
156
- "pred_label": 0.0,
157
- "rewards/accuracies": 0.26249998807907104,
158
- "rewards/chosen": -0.12314031273126602,
159
- "rewards/margins": 0.0946219339966774,
160
- "rewards/rejected": -0.21776223182678223,
161
  "step": 80,
162
  "use_label": 0.0
163
  },
164
  {
165
  "epoch": 0.19,
166
- "grad_norm": 1.5390625,
167
  "learning_rate": 4.882681251368549e-06,
168
- "logits/chosen": -2.109405279159546,
169
- "logits/rejected": -2.1181578636169434,
170
- "logps/chosen": -77.24811553955078,
171
- "logps/rejected": -95.32093811035156,
172
- "loss": 0.6621,
173
  "pred_label": 0.0,
174
- "rewards/accuracies": 0.3062500059604645,
175
- "rewards/chosen": -0.1886606067419052,
176
- "rewards/margins": 0.07690713554620743,
177
- "rewards/rejected": -0.26556771993637085,
178
  "step": 90,
179
  "use_label": 0.0
180
  },
181
  {
182
  "epoch": 0.21,
183
- "grad_norm": 1.1640625,
184
  "learning_rate": 4.8209198325401815e-06,
185
- "logits/chosen": -2.1972146034240723,
186
- "logits/rejected": -2.169661283493042,
187
- "logps/chosen": -92.16123962402344,
188
- "logps/rejected": -84.31734466552734,
189
- "loss": 0.6553,
190
  "pred_label": 0.0,
191
- "rewards/accuracies": 0.3499999940395355,
192
- "rewards/chosen": -0.12131345272064209,
193
- "rewards/margins": 0.08319222182035446,
194
- "rewards/rejected": -0.20450565218925476,
195
  "step": 100,
196
  "use_label": 0.0
197
  },
198
  {
199
  "epoch": 0.21,
200
- "eval_logits/chosen": -2.0832693576812744,
201
- "eval_logits/rejected": -2.0725808143615723,
202
- "eval_logps/chosen": -76.57865905761719,
203
- "eval_logps/rejected": -104.04773712158203,
204
- "eval_loss": 0.6557236313819885,
205
  "eval_pred_label": 0.0,
206
- "eval_rewards/accuracies": 0.36328125,
207
- "eval_rewards/chosen": -0.12666408717632294,
208
- "eval_rewards/margins": 0.14188387989997864,
209
- "eval_rewards/rejected": -0.26854798197746277,
210
- "eval_runtime": 125.5075,
211
- "eval_samples_per_second": 15.935,
212
  "eval_steps_per_second": 0.255,
213
  "eval_use_label": 0.0,
214
  "step": 100
215
  },
216
  {
217
  "epoch": 0.23,
218
- "grad_norm": 1.1484375,
219
  "learning_rate": 4.746717530629565e-06,
220
- "logits/chosen": -2.125093460083008,
221
- "logits/rejected": -2.108320713043213,
222
- "logps/chosen": -86.47650146484375,
223
- "logps/rejected": -108.77266693115234,
224
- "loss": 0.6536,
225
  "pred_label": 0.0,
226
- "rewards/accuracies": 0.375,
227
- "rewards/chosen": -0.1537572741508484,
228
- "rewards/margins": 0.14806225895881653,
229
- "rewards/rejected": -0.3018195331096649,
230
  "step": 110,
231
  "use_label": 0.0
232
  },
233
  {
234
  "epoch": 0.25,
235
- "grad_norm": 1.5390625,
236
  "learning_rate": 4.660472094042121e-06,
237
- "logits/chosen": -1.9497900009155273,
238
- "logits/rejected": -1.8884683847427368,
239
- "logps/chosen": -95.01170349121094,
240
- "logps/rejected": -114.40583801269531,
241
- "loss": 0.652,
242
  "pred_label": 0.0,
243
  "rewards/accuracies": 0.36250001192092896,
244
- "rewards/chosen": -0.2506612241268158,
245
- "rewards/margins": 0.16420678794384003,
246
- "rewards/rejected": -0.414868026971817,
247
  "step": 120,
248
  "use_label": 0.0
249
  },
250
  {
251
  "epoch": 0.27,
252
- "grad_norm": 1.9296875,
253
  "learning_rate": 4.5626458262912745e-06,
254
- "logits/chosen": -1.7961517572402954,
255
- "logits/rejected": -1.7706302404403687,
256
- "logps/chosen": -90.99502563476562,
257
- "logps/rejected": -112.71142578125,
258
- "loss": 0.654,
259
  "pred_label": 0.0,
260
- "rewards/accuracies": 0.34375,
261
- "rewards/chosen": -0.2521664500236511,
262
- "rewards/margins": 0.1464831829071045,
263
- "rewards/rejected": -0.3986496329307556,
264
  "step": 130,
265
  "use_label": 0.0
266
  },
267
  {
268
  "epoch": 0.29,
269
- "grad_norm": 1.9921875,
270
  "learning_rate": 4.453763107901676e-06,
271
- "logits/chosen": -1.7561969757080078,
272
- "logits/rejected": -1.796431541442871,
273
- "logps/chosen": -96.94844818115234,
274
- "logps/rejected": -107.52276611328125,
275
- "loss": 0.6488,
276
  "pred_label": 0.0,
277
- "rewards/accuracies": 0.26875001192092896,
278
- "rewards/chosen": -0.1620088815689087,
279
- "rewards/margins": 0.12216176092624664,
280
- "rewards/rejected": -0.28417062759399414,
281
  "step": 140,
282
  "use_label": 0.0
283
  },
284
  {
285
  "epoch": 0.31,
286
- "grad_norm": 1.7578125,
287
  "learning_rate": 4.33440758555951e-06,
288
- "logits/chosen": -1.7516326904296875,
289
- "logits/rejected": -1.7187411785125732,
290
- "logps/chosen": -78.70259857177734,
291
- "logps/rejected": -104.34063720703125,
292
- "loss": 0.6451,
293
  "pred_label": 0.0,
294
- "rewards/accuracies": 0.32499998807907104,
295
- "rewards/chosen": -0.13555890321731567,
296
- "rewards/margins": 0.22945857048034668,
297
- "rewards/rejected": -0.36501747369766235,
298
  "step": 150,
299
  "use_label": 0.0
300
  },
301
  {
302
  "epoch": 0.33,
303
- "grad_norm": 2.640625,
304
  "learning_rate": 4.205219043576955e-06,
305
- "logits/chosen": -1.481575608253479,
306
- "logits/rejected": -1.468014121055603,
307
- "logps/chosen": -100.68672180175781,
308
- "logps/rejected": -127.04164123535156,
309
- "loss": 0.6442,
310
  "pred_label": 0.0,
311
- "rewards/accuracies": 0.29374998807907104,
312
- "rewards/chosen": -0.36356669664382935,
313
- "rewards/margins": 0.1327240914106369,
314
- "rewards/rejected": -0.49629077315330505,
315
  "step": 160,
316
  "use_label": 0.0
317
  },
318
  {
319
  "epoch": 0.36,
320
- "grad_norm": 2.390625,
321
  "learning_rate": 4.066889974440757e-06,
322
- "logits/chosen": -0.9005377888679504,
323
- "logits/rejected": -0.8864371180534363,
324
- "logps/chosen": -85.81999206542969,
325
- "logps/rejected": -110.4801254272461,
326
- "loss": 0.6339,
327
  "pred_label": 0.0,
328
- "rewards/accuracies": 0.30000001192092896,
329
- "rewards/chosen": -0.3031192421913147,
330
- "rewards/margins": 0.1594724804162979,
331
- "rewards/rejected": -0.4625917375087738,
332
  "step": 170,
333
  "use_label": 0.0
334
  },
335
  {
336
  "epoch": 0.38,
337
- "grad_norm": 2.78125,
338
  "learning_rate": 3.92016186682789e-06,
339
- "logits/chosen": -0.591436505317688,
340
- "logits/rejected": -0.5489451885223389,
341
- "logps/chosen": -103.7041015625,
342
- "logps/rejected": -123.32816314697266,
343
- "loss": 0.6554,
344
  "pred_label": 0.0,
345
- "rewards/accuracies": 0.3375000059604645,
346
- "rewards/chosen": -0.40916457772254944,
347
- "rewards/margins": 0.2612735629081726,
348
- "rewards/rejected": -0.6704381108283997,
349
  "step": 180,
350
  "use_label": 0.0
351
  },
352
  {
353
  "epoch": 0.4,
354
- "grad_norm": 2.09375,
355
  "learning_rate": 3.7658212309857576e-06,
356
- "logits/chosen": -0.801749587059021,
357
- "logits/rejected": -0.588916003704071,
358
- "logps/chosen": -96.86283874511719,
359
- "logps/rejected": -123.17811584472656,
360
- "loss": 0.6508,
361
  "pred_label": 0.0,
362
- "rewards/accuracies": 0.3062500059604645,
363
- "rewards/chosen": -0.37751203775405884,
364
- "rewards/margins": 0.21026258170604706,
365
- "rewards/rejected": -0.5877746343612671,
366
  "step": 190,
367
  "use_label": 0.0
368
  },
369
  {
370
  "epoch": 0.42,
371
- "grad_norm": 1.59375,
372
  "learning_rate": 3.604695382782159e-06,
373
- "logits/chosen": -1.114527940750122,
374
- "logits/rejected": -1.0130901336669922,
375
- "logps/chosen": -111.54571533203125,
376
- "logps/rejected": -115.97926330566406,
377
- "loss": 0.6446,
378
  "pred_label": 0.0,
379
- "rewards/accuracies": 0.3187499940395355,
380
- "rewards/chosen": -0.2986941933631897,
381
- "rewards/margins": 0.1296522319316864,
382
- "rewards/rejected": -0.4283464550971985,
383
  "step": 200,
384
  "use_label": 0.0
385
  },
386
  {
387
  "epoch": 0.42,
388
- "eval_logits/chosen": -0.7123901844024658,
389
- "eval_logits/rejected": -0.6864092350006104,
390
- "eval_logps/chosen": -92.6377182006836,
391
- "eval_logps/rejected": -130.9503173828125,
392
- "eval_loss": 0.6342783570289612,
393
  "eval_pred_label": 0.0,
394
- "eval_rewards/accuracies": 0.3828125,
395
- "eval_rewards/chosen": -0.28725457191467285,
396
- "eval_rewards/margins": 0.250319242477417,
397
- "eval_rewards/rejected": -0.5375738143920898,
398
- "eval_runtime": 125.6586,
399
- "eval_samples_per_second": 15.916,
400
  "eval_steps_per_second": 0.255,
401
  "eval_use_label": 0.0,
402
  "step": 200
403
  },
404
  {
405
  "epoch": 0.44,
406
- "grad_norm": 2.140625,
407
  "learning_rate": 3.437648009023905e-06,
408
- "logits/chosen": -0.6364002227783203,
409
- "logits/rejected": -0.629191517829895,
410
- "logps/chosen": -79.12034606933594,
411
- "logps/rejected": -109.35395812988281,
412
- "loss": 0.6319,
413
  "pred_label": 0.0,
414
- "rewards/accuracies": 0.35624998807907104,
415
- "rewards/chosen": -0.23145589232444763,
416
- "rewards/margins": 0.2322908192873001,
417
- "rewards/rejected": -0.46374672651290894,
418
  "step": 210,
419
  "use_label": 0.0
420
  },
421
  {
422
  "epoch": 0.46,
423
- "grad_norm": 2.453125,
424
  "learning_rate": 3.265574537815398e-06,
425
- "logits/chosen": -0.24914255738258362,
426
- "logits/rejected": -0.12895795702934265,
427
- "logps/chosen": -123.09925842285156,
428
- "logps/rejected": -127.96968078613281,
429
- "loss": 0.633,
430
  "pred_label": 0.0,
431
- "rewards/accuracies": 0.3187499940395355,
432
- "rewards/chosen": -0.43470579385757446,
433
- "rewards/margins": 0.1813107430934906,
434
- "rewards/rejected": -0.6160165071487427,
435
  "step": 220,
436
  "use_label": 0.0
437
  },
438
  {
439
  "epoch": 0.48,
440
- "grad_norm": 2.734375,
441
  "learning_rate": 3.089397338773569e-06,
442
- "logits/chosen": 0.08423249423503876,
443
- "logits/rejected": 0.1725344955921173,
444
- "logps/chosen": -98.91605377197266,
445
- "logps/rejected": -125.9875259399414,
446
- "loss": 0.6278,
447
  "pred_label": 0.0,
448
- "rewards/accuracies": 0.33125001192092896,
449
- "rewards/chosen": -0.3448147773742676,
450
- "rewards/margins": 0.287472665309906,
451
- "rewards/rejected": -0.6322874426841736,
452
  "step": 230,
453
  "use_label": 0.0
454
  },
455
  {
456
  "epoch": 0.5,
457
- "grad_norm": 2.015625,
458
  "learning_rate": 2.9100607788275547e-06,
459
- "logits/chosen": 0.48232460021972656,
460
- "logits/rejected": 0.39376580715179443,
461
- "logps/chosen": -108.98759460449219,
462
- "logps/rejected": -142.29344177246094,
463
- "loss": 0.6294,
464
  "pred_label": 0.0,
465
- "rewards/accuracies": 0.3812499940395355,
466
- "rewards/chosen": -0.39955058693885803,
467
- "rewards/margins": 0.28114694356918335,
468
- "rewards/rejected": -0.680697500705719,
469
  "step": 240,
470
  "use_label": 0.0
471
  },
472
  {
473
  "epoch": 0.52,
474
- "grad_norm": 2.25,
475
  "learning_rate": 2.72852616010567e-06,
476
- "logits/chosen": 0.35806649923324585,
477
- "logits/rejected": 0.41671887040138245,
478
- "logps/chosen": -126.65348052978516,
479
- "logps/rejected": -151.3179168701172,
480
- "loss": 0.6419,
481
  "pred_label": 0.0,
482
- "rewards/accuracies": 0.36250001192092896,
483
- "rewards/chosen": -0.5325437784194946,
484
- "rewards/margins": 0.28831106424331665,
485
- "rewards/rejected": -0.8208548426628113,
486
  "step": 250,
487
  "use_label": 0.0
488
  },
489
  {
490
  "epoch": 0.54,
491
- "grad_norm": 2.46875,
492
  "learning_rate": 2.5457665670441937e-06,
493
- "logits/chosen": 0.4644729197025299,
494
- "logits/rejected": 0.45051756501197815,
495
- "logps/chosen": -110.62007904052734,
496
- "logps/rejected": -142.76722717285156,
497
- "loss": 0.6232,
498
  "pred_label": 0.0,
499
- "rewards/accuracies": 0.32499998807907104,
500
- "rewards/chosen": -0.4451447129249573,
501
- "rewards/margins": 0.2380482256412506,
502
- "rewards/rejected": -0.6831929087638855,
503
  "step": 260,
504
  "use_label": 0.0
505
  },
506
  {
507
  "epoch": 0.57,
508
- "grad_norm": 2.4375,
509
  "learning_rate": 2.3627616503391813e-06,
510
- "logits/chosen": 0.6336380839347839,
511
- "logits/rejected": 0.5556719303131104,
512
- "logps/chosen": -116.7416000366211,
513
- "logps/rejected": -135.33096313476562,
514
- "loss": 0.6174,
515
  "pred_label": 0.0,
516
- "rewards/accuracies": 0.3375000059604645,
517
- "rewards/chosen": -0.43825817108154297,
518
- "rewards/margins": 0.22129836678504944,
519
- "rewards/rejected": -0.65955650806427,
520
  "step": 270,
521
  "use_label": 0.0
522
  },
523
  {
524
  "epoch": 0.59,
525
- "grad_norm": 3.0625,
526
  "learning_rate": 2.1804923757009885e-06,
527
- "logits/chosen": 0.6383472681045532,
528
- "logits/rejected": 0.7697634100914001,
529
- "logps/chosen": -106.45858001708984,
530
- "logps/rejected": -125.5028305053711,
531
- "loss": 0.6353,
532
  "pred_label": 0.0,
533
  "rewards/accuracies": 0.30000001192092896,
534
- "rewards/chosen": -0.4095306992530823,
535
- "rewards/margins": 0.21630148589611053,
536
- "rewards/rejected": -0.625832200050354,
537
  "step": 280,
538
  "use_label": 0.0
539
  },
540
  {
541
  "epoch": 0.61,
542
- "grad_norm": 3.328125,
543
  "learning_rate": 1.9999357655598894e-06,
544
- "logits/chosen": 0.1407470554113388,
545
- "logits/rejected": 0.12877413630485535,
546
- "logps/chosen": -108.0340805053711,
547
- "logps/rejected": -136.49562072753906,
548
- "loss": 0.6265,
549
  "pred_label": 0.0,
550
  "rewards/accuracies": 0.30000001192092896,
551
- "rewards/chosen": -0.41485634446144104,
552
- "rewards/margins": 0.18648667633533478,
553
- "rewards/rejected": -0.601343035697937,
554
  "step": 290,
555
  "use_label": 0.0
556
  },
557
  {
558
  "epoch": 0.63,
559
- "grad_norm": 3.03125,
560
  "learning_rate": 1.8220596619089576e-06,
561
- "logits/chosen": 0.4002162516117096,
562
- "logits/rejected": 0.25351682305336,
563
- "logps/chosen": -127.95108795166016,
564
- "logps/rejected": -172.98793029785156,
565
- "loss": 0.6273,
566
- "pred_label": 0.0,
567
- "rewards/accuracies": 0.4124999940395355,
568
- "rewards/chosen": -0.5035675168037415,
569
- "rewards/margins": 0.2851078510284424,
570
- "rewards/rejected": -0.7886753678321838,
571
  "step": 300,
572
  "use_label": 0.0
573
  },
574
  {
575
  "epoch": 0.63,
576
- "eval_logits/chosen": 0.6280341148376465,
577
- "eval_logits/rejected": 0.6725929379463196,
578
- "eval_logps/chosen": -110.14692687988281,
579
- "eval_logps/rejected": -157.1332244873047,
580
- "eval_loss": 0.620426595211029,
581
  "eval_pred_label": 0.0,
582
- "eval_rewards/accuracies": 0.3671875,
583
- "eval_rewards/chosen": -0.46234679222106934,
584
- "eval_rewards/margins": 0.33705610036849976,
585
- "eval_rewards/rejected": -0.7994028329849243,
586
- "eval_runtime": 125.7299,
587
- "eval_samples_per_second": 15.907,
588
  "eval_steps_per_second": 0.255,
589
  "eval_use_label": 0.0,
590
  "step": 300
591
  },
592
  {
593
  "epoch": 0.65,
594
- "grad_norm": 2.390625,
595
  "learning_rate": 1.647817538357072e-06,
596
- "logits/chosen": 0.33872538805007935,
597
- "logits/rejected": 0.3415250778198242,
598
- "logps/chosen": -95.08795166015625,
599
- "logps/rejected": -142.95713806152344,
600
- "loss": 0.6014,
601
  "pred_label": 0.0,
602
- "rewards/accuracies": 0.375,
603
- "rewards/chosen": -0.39491352438926697,
604
- "rewards/margins": 0.35215410590171814,
605
- "rewards/rejected": -0.7470676302909851,
606
  "step": 310,
607
  "use_label": 0.0
608
  },
609
  {
610
  "epoch": 0.67,
611
- "grad_norm": 2.546875,
612
  "learning_rate": 1.4781433892011132e-06,
613
- "logits/chosen": 0.2642754018306732,
614
- "logits/rejected": 0.4063233435153961,
615
- "logps/chosen": -131.07791137695312,
616
- "logps/rejected": -164.12667846679688,
617
- "loss": 0.6133,
618
  "pred_label": 0.0,
619
- "rewards/accuracies": 0.38749998807907104,
620
- "rewards/chosen": -0.580074667930603,
621
- "rewards/margins": 0.38923436403274536,
622
- "rewards/rejected": -0.9693089723587036,
623
  "step": 320,
624
  "use_label": 0.0
625
  },
626
  {
627
  "epoch": 0.69,
628
- "grad_norm": 3.15625,
629
  "learning_rate": 1.3139467229135999e-06,
630
- "logits/chosen": 0.5224499106407166,
631
- "logits/rejected": 0.5213581919670105,
632
- "logps/chosen": -130.00186157226562,
633
- "logps/rejected": -156.6516876220703,
634
- "loss": 0.6387,
635
  "pred_label": 0.0,
636
- "rewards/accuracies": 0.3375000059604645,
637
- "rewards/chosen": -0.5989372134208679,
638
- "rewards/margins": 0.2814994752407074,
639
- "rewards/rejected": -0.8804367184638977,
640
  "step": 330,
641
  "use_label": 0.0
642
  },
643
  {
644
  "epoch": 0.71,
645
- "grad_norm": 2.28125,
646
  "learning_rate": 1.1561076868822756e-06,
647
- "logits/chosen": 0.1671726554632187,
648
- "logits/rejected": 0.0974355936050415,
649
- "logps/chosen": -140.3222198486328,
650
- "logps/rejected": -155.46217346191406,
651
- "loss": 0.6252,
652
  "pred_label": 0.0,
653
- "rewards/accuracies": 0.3375000059604645,
654
- "rewards/chosen": -0.5558302998542786,
655
- "rewards/margins": 0.23368898034095764,
656
- "rewards/rejected": -0.7895193099975586,
657
  "step": 340,
658
  "use_label": 0.0
659
  },
660
  {
661
  "epoch": 0.73,
662
- "grad_norm": 3.328125,
663
  "learning_rate": 1.0054723495346484e-06,
664
- "logits/chosen": 0.081739641726017,
665
- "logits/rejected": 0.08175826817750931,
666
- "logps/chosen": -150.41506958007812,
667
- "logps/rejected": -178.51565551757812,
668
- "loss": 0.6231,
669
  "pred_label": 0.0,
670
- "rewards/accuracies": 0.375,
671
- "rewards/chosen": -0.6099845170974731,
672
- "rewards/margins": 0.322490930557251,
673
- "rewards/rejected": -0.9324753880500793,
674
  "step": 350,
675
  "use_label": 0.0
676
  },
677
  {
678
  "epoch": 0.75,
679
- "grad_norm": 1.875,
680
  "learning_rate": 8.628481651367876e-07,
681
- "logits/chosen": 0.12279005348682404,
682
- "logits/rejected": 0.20824797451496124,
683
- "logps/chosen": -110.51042175292969,
684
- "logps/rejected": -153.92698669433594,
685
- "loss": 0.6186,
686
  "pred_label": 0.0,
687
- "rewards/accuracies": 0.3687500059604645,
688
- "rewards/chosen": -0.46872806549072266,
689
- "rewards/margins": 0.3482593894004822,
690
- "rewards/rejected": -0.8169875144958496,
691
  "step": 360,
692
  "use_label": 0.0
693
  },
694
  {
695
  "epoch": 0.77,
696
- "grad_norm": 2.15625,
697
  "learning_rate": 7.289996455765749e-07,
698
- "logits/chosen": 0.19759848713874817,
699
- "logits/rejected": 0.29472407698631287,
700
- "logps/chosen": -103.1863021850586,
701
- "logps/rejected": -143.578125,
702
- "loss": 0.6166,
703
  "pred_label": 0.0,
704
- "rewards/accuracies": 0.3687500059604645,
705
- "rewards/chosen": -0.37751519680023193,
706
- "rewards/margins": 0.37911203503608704,
707
- "rewards/rejected": -0.7566272020339966,
708
  "step": 370,
709
  "use_label": 0.0
710
  },
711
  {
712
  "epoch": 0.8,
713
- "grad_norm": 1.96875,
714
  "learning_rate": 6.046442623320145e-07,
715
- "logits/chosen": 0.03893072158098221,
716
- "logits/rejected": 0.019468214362859726,
717
- "logps/chosen": -108.17799377441406,
718
- "logps/rejected": -158.08056640625,
719
- "loss": 0.6183,
720
  "pred_label": 0.0,
721
  "rewards/accuracies": 0.3187499940395355,
722
- "rewards/chosen": -0.42342591285705566,
723
- "rewards/margins": 0.2937392592430115,
724
- "rewards/rejected": -0.7171651124954224,
725
  "step": 380,
726
  "use_label": 0.0
727
  },
728
  {
729
  "epoch": 0.82,
730
- "grad_norm": 2.59375,
731
  "learning_rate": 4.904486005914027e-07,
732
- "logits/chosen": 0.33429718017578125,
733
- "logits/rejected": 0.08158789575099945,
734
- "logps/chosen": -151.29055786132812,
735
- "logps/rejected": -180.48861694335938,
736
- "loss": 0.6114,
737
  "pred_label": 0.0,
738
- "rewards/accuracies": 0.39375001192092896,
739
- "rewards/chosen": -0.5847219824790955,
740
- "rewards/margins": 0.3904651999473572,
741
- "rewards/rejected": -0.9751871824264526,
742
  "step": 390,
743
  "use_label": 0.0
744
  },
745
  {
746
  "epoch": 0.84,
747
- "grad_norm": 2.015625,
748
  "learning_rate": 3.8702478614051353e-07,
749
- "logits/chosen": 0.126608207821846,
750
- "logits/rejected": 0.2576550841331482,
751
- "logps/chosen": -109.39167785644531,
752
- "logps/rejected": -134.27053833007812,
753
- "loss": 0.6165,
754
  "pred_label": 0.0,
755
- "rewards/accuracies": 0.38749998807907104,
756
- "rewards/chosen": -0.36900678277015686,
757
- "rewards/margins": 0.3390708863735199,
758
- "rewards/rejected": -0.708077609539032,
759
  "step": 400,
760
  "use_label": 0.0
761
  },
762
  {
763
  "epoch": 0.84,
764
- "eval_logits/chosen": 0.903490424156189,
765
- "eval_logits/rejected": 0.958048939704895,
766
- "eval_logps/chosen": -108.47840881347656,
767
- "eval_logps/rejected": -158.4149169921875,
768
- "eval_loss": 0.6182093620300293,
769
  "eval_pred_label": 0.0,
770
- "eval_rewards/accuracies": 0.3671875,
771
- "eval_rewards/chosen": -0.4456615447998047,
772
- "eval_rewards/margins": 0.3665582537651062,
773
- "eval_rewards/rejected": -0.8122197389602661,
774
- "eval_runtime": 125.7278,
775
- "eval_samples_per_second": 15.907,
776
  "eval_steps_per_second": 0.255,
777
  "eval_use_label": 0.0,
778
  "step": 400
779
  },
780
  {
781
  "epoch": 0.86,
782
- "grad_norm": 2.046875,
783
  "learning_rate": 2.9492720416985004e-07,
784
- "logits/chosen": 0.39335688948631287,
785
- "logits/rejected": 0.41703349351882935,
786
- "logps/chosen": -106.9058837890625,
787
- "logps/rejected": -138.57296752929688,
788
- "loss": 0.6272,
789
  "pred_label": 0.0,
790
- "rewards/accuracies": 0.36250001192092896,
791
- "rewards/chosen": -0.45482879877090454,
792
- "rewards/margins": 0.3337084650993347,
793
- "rewards/rejected": -0.788537323474884,
794
  "step": 410,
795
  "use_label": 0.0
796
  },
797
  {
798
  "epoch": 0.88,
799
- "grad_norm": 2.078125,
800
  "learning_rate": 2.1464952759020857e-07,
801
- "logits/chosen": 0.5264393091201782,
802
- "logits/rejected": 0.4952784478664398,
803
- "logps/chosen": -104.27522277832031,
804
- "logps/rejected": -112.507080078125,
805
- "loss": 0.6235,
806
  "pred_label": 0.0,
807
  "rewards/accuracies": 0.2750000059604645,
808
- "rewards/chosen": -0.4333609640598297,
809
- "rewards/margins": 0.1778794825077057,
810
- "rewards/rejected": -0.6112405061721802,
811
  "step": 420,
812
  "use_label": 0.0
813
  },
814
  {
815
  "epoch": 0.9,
816
- "grad_norm": 1.734375,
817
  "learning_rate": 1.4662207078575685e-07,
818
- "logits/chosen": 0.47332754731178284,
819
- "logits/rejected": 0.4613571763038635,
820
- "logps/chosen": -144.65744018554688,
821
- "logps/rejected": -170.08921813964844,
822
- "loss": 0.5988,
823
  "pred_label": 0.0,
824
  "rewards/accuracies": 0.45625001192092896,
825
- "rewards/chosen": -0.4539059102535248,
826
- "rewards/margins": 0.4534150958061218,
827
- "rewards/rejected": -0.9073210954666138,
828
  "step": 430,
829
  "use_label": 0.0
830
  },
831
  {
832
  "epoch": 0.92,
833
- "grad_norm": 1.9609375,
834
  "learning_rate": 9.120948298936422e-08,
835
- "logits/chosen": 0.48202329874038696,
836
- "logits/rejected": 0.6259401440620422,
837
- "logps/chosen": -114.15118408203125,
838
- "logps/rejected": -161.5361785888672,
839
- "loss": 0.6098,
840
  "pred_label": 0.0,
841
- "rewards/accuracies": 0.36250001192092896,
842
- "rewards/chosen": -0.4724721908569336,
843
- "rewards/margins": 0.39225998520851135,
844
- "rewards/rejected": -0.8647321462631226,
845
  "step": 440,
846
  "use_label": 0.0
847
  },
848
  {
849
  "epoch": 0.94,
850
- "grad_norm": 2.265625,
851
  "learning_rate": 4.870879364444109e-08,
852
- "logits/chosen": 0.8100695610046387,
853
- "logits/rejected": 0.5903851389884949,
854
- "logps/chosen": -126.81998443603516,
855
- "logps/rejected": -174.6106719970703,
856
- "loss": 0.6122,
857
  "pred_label": 0.0,
858
- "rewards/accuracies": 0.36250001192092896,
859
- "rewards/chosen": -0.5456215739250183,
860
- "rewards/margins": 0.3175886273384094,
861
- "rewards/rejected": -0.8632103204727173,
862
  "step": 450,
863
  "use_label": 0.0
864
  },
865
  {
866
  "epoch": 0.96,
867
- "grad_norm": 2.140625,
868
  "learning_rate": 1.93478202307823e-08,
869
- "logits/chosen": 0.7001665830612183,
870
- "logits/rejected": 0.7000536322593689,
871
- "logps/chosen": -80.71357727050781,
872
- "logps/rejected": -126.110595703125,
873
- "loss": 0.6182,
874
  "pred_label": 0.0,
875
- "rewards/accuracies": 0.32499998807907104,
876
- "rewards/chosen": -0.3459371328353882,
877
- "rewards/margins": 0.2817174792289734,
878
- "rewards/rejected": -0.6276546716690063,
879
  "step": 460,
880
  "use_label": 0.0
881
  },
882
  {
883
  "epoch": 0.98,
884
- "grad_norm": 2.78125,
885
  "learning_rate": 3.283947088983663e-09,
886
- "logits/chosen": 0.7130995392799377,
887
- "logits/rejected": 0.5145190954208374,
888
- "logps/chosen": -110.40830993652344,
889
- "logps/rejected": -137.49429321289062,
890
- "loss": 0.6251,
891
  "pred_label": 0.0,
892
  "rewards/accuracies": 0.3125,
893
- "rewards/chosen": -0.43079155683517456,
894
- "rewards/margins": 0.25358152389526367,
895
- "rewards/rejected": -0.6843730211257935,
896
  "step": 470,
897
  "use_label": 0.0
898
  },
@@ -900,9 +900,9 @@
900
  "epoch": 1.0,
901
  "step": 477,
902
  "total_flos": 0.0,
903
- "train_loss": 0.6389844682481554,
904
- "train_runtime": 9615.2592,
905
- "train_samples_per_second": 6.358,
906
  "train_steps_per_second": 0.05
907
  }
908
  ],
 
29
  "epoch": 0.02,
30
  "grad_norm": 0.4609375,
31
  "learning_rate": 1.0416666666666667e-06,
32
+ "logits/chosen": -2.2421462535858154,
33
+ "logits/rejected": -2.2770614624023438,
34
+ "logps/chosen": -51.98179626464844,
35
+ "logps/rejected": -64.9604263305664,
36
  "loss": 0.6929,
37
  "pred_label": 0.0,
38
+ "rewards/accuracies": 0.2222222238779068,
39
+ "rewards/chosen": 0.001975727966055274,
40
+ "rewards/margins": 0.00047667179023846984,
41
+ "rewards/rejected": 0.001499056350439787,
42
  "step": 10,
43
  "use_label": 0.0
44
  },
45
  {
46
  "epoch": 0.04,
47
+ "grad_norm": 0.39453125,
48
  "learning_rate": 2.0833333333333334e-06,
49
+ "logits/chosen": -2.2520272731781006,
50
+ "logits/rejected": -2.255510091781616,
51
+ "logps/chosen": -62.492515563964844,
52
+ "logps/rejected": -72.63607788085938,
53
  "loss": 0.6919,
54
  "pred_label": 0.0,
55
+ "rewards/accuracies": 0.2874999940395355,
56
+ "rewards/chosen": 0.01601376011967659,
57
+ "rewards/margins": 0.0011284304782748222,
58
+ "rewards/rejected": 0.014885328710079193,
59
  "step": 20,
60
  "use_label": 0.0
61
  },
62
  {
63
  "epoch": 0.06,
64
+ "grad_norm": 0.5078125,
65
  "learning_rate": 3.125e-06,
66
+ "logits/chosen": -2.3422012329101562,
67
+ "logits/rejected": -2.3548905849456787,
68
+ "logps/chosen": -79.14694213867188,
69
+ "logps/rejected": -98.82722473144531,
70
  "loss": 0.6898,
71
  "pred_label": 0.0,
72
  "rewards/accuracies": 0.2874999940395355,
73
+ "rewards/chosen": 0.030949687585234642,
74
+ "rewards/margins": 0.0029636542312800884,
75
+ "rewards/rejected": 0.027986034750938416,
76
  "step": 30,
77
  "use_label": 0.0
78
  },
79
  {
80
  "epoch": 0.08,
81
+ "grad_norm": 0.515625,
82
  "learning_rate": 4.166666666666667e-06,
83
+ "logits/chosen": -2.322833776473999,
84
+ "logits/rejected": -2.3010501861572266,
85
+ "logps/chosen": -82.85880279541016,
86
+ "logps/rejected": -82.40392303466797,
87
  "loss": 0.6866,
88
  "pred_label": 0.0,
89
  "rewards/accuracies": 0.2874999940395355,
90
+ "rewards/chosen": 0.033333443105220795,
91
+ "rewards/margins": 0.011918319389224052,
92
+ "rewards/rejected": 0.021415119990706444,
93
  "step": 40,
94
  "use_label": 0.0
95
  },
96
  {
97
  "epoch": 0.1,
98
+ "grad_norm": 0.67578125,
99
  "learning_rate": 4.999731868769027e-06,
100
+ "logits/chosen": -2.241189956665039,
101
+ "logits/rejected": -2.263849973678589,
102
+ "logps/chosen": -67.93062591552734,
103
+ "logps/rejected": -81.85546875,
104
  "loss": 0.6805,
105
  "pred_label": 0.0,
106
  "rewards/accuracies": 0.32499998807907104,
107
+ "rewards/chosen": 0.009002490900456905,
108
+ "rewards/margins": 0.03016103245317936,
109
+ "rewards/rejected": -0.02115854248404503,
110
  "step": 50,
111
  "use_label": 0.0
112
  },
113
  {
114
  "epoch": 0.13,
115
+ "grad_norm": 1.09375,
116
  "learning_rate": 4.9903533134293035e-06,
117
+ "logits/chosen": -2.218756914138794,
118
+ "logits/rejected": -2.1594481468200684,
119
+ "logps/chosen": -62.0407600402832,
120
+ "logps/rejected": -71.9369888305664,
121
+ "loss": 0.6748,
122
  "pred_label": 0.0,
123
+ "rewards/accuracies": 0.3062500059604645,
124
+ "rewards/chosen": -0.0231451578438282,
125
+ "rewards/margins": 0.04653460532426834,
126
+ "rewards/rejected": -0.06967976689338684,
127
  "step": 60,
128
  "use_label": 0.0
129
  },
130
  {
131
  "epoch": 0.15,
132
+ "grad_norm": 0.8984375,
133
  "learning_rate": 4.967625656594782e-06,
134
+ "logits/chosen": -2.08909273147583,
135
+ "logits/rejected": -2.088801383972168,
136
+ "logps/chosen": -68.09326171875,
137
+ "logps/rejected": -81.9454116821289,
138
+ "loss": 0.6684,
139
  "pred_label": 0.0,
140
  "rewards/accuracies": 0.25,
141
+ "rewards/chosen": -0.12382155656814575,
142
+ "rewards/margins": 0.03761869668960571,
143
+ "rewards/rejected": -0.16144026815891266,
144
  "step": 70,
145
  "use_label": 0.0
146
  },
147
  {
148
  "epoch": 0.17,
149
+ "grad_norm": 1.15625,
150
  "learning_rate": 4.93167072587771e-06,
151
+ "logits/chosen": -2.20400071144104,
152
+ "logits/rejected": -2.1452622413635254,
153
+ "logps/chosen": -55.867881774902344,
154
+ "logps/rejected": -70.91771697998047,
155
+ "loss": 0.6588,
156
+ "pred_label": 0.0,
157
+ "rewards/accuracies": 0.26875001192092896,
158
+ "rewards/chosen": -0.0733698159456253,
159
+ "rewards/margins": 0.10403277724981308,
160
+ "rewards/rejected": -0.17740261554718018,
161
  "step": 80,
162
  "use_label": 0.0
163
  },
164
  {
165
  "epoch": 0.19,
166
+ "grad_norm": 1.0546875,
167
  "learning_rate": 4.882681251368549e-06,
168
+ "logits/chosen": -1.991231918334961,
169
+ "logits/rejected": -1.9964717626571655,
170
+ "logps/chosen": -72.28443908691406,
171
+ "logps/rejected": -90.79218292236328,
172
+ "loss": 0.6587,
173
  "pred_label": 0.0,
174
+ "rewards/accuracies": 0.30000001192092896,
175
+ "rewards/chosen": -0.13902384042739868,
176
+ "rewards/margins": 0.08125626295804977,
177
+ "rewards/rejected": -0.22028008103370667,
178
  "step": 90,
179
  "use_label": 0.0
180
  },
181
  {
182
  "epoch": 0.21,
183
+ "grad_norm": 2.359375,
184
  "learning_rate": 4.8209198325401815e-06,
185
+ "logits/chosen": -1.9231764078140259,
186
+ "logits/rejected": -1.9043807983398438,
187
+ "logps/chosen": -103.5636978149414,
188
+ "logps/rejected": -96.08602142333984,
189
+ "loss": 0.6551,
190
  "pred_label": 0.0,
191
+ "rewards/accuracies": 0.35624998807907104,
192
+ "rewards/chosen": -0.2353379726409912,
193
+ "rewards/margins": 0.08685441315174103,
194
+ "rewards/rejected": -0.32219237089157104,
195
  "step": 100,
196
  "use_label": 0.0
197
  },
198
  {
199
  "epoch": 0.21,
200
+ "eval_logits/chosen": -1.762041687965393,
201
+ "eval_logits/rejected": -1.7460479736328125,
202
+ "eval_logps/chosen": -87.55253601074219,
203
+ "eval_logps/rejected": -114.47212219238281,
204
+ "eval_loss": 0.652633547782898,
205
  "eval_pred_label": 0.0,
206
+ "eval_rewards/accuracies": 0.3359375,
207
+ "eval_rewards/chosen": -0.23640292882919312,
208
+ "eval_rewards/margins": 0.136388897895813,
209
+ "eval_rewards/rejected": -0.3727918267250061,
210
+ "eval_runtime": 125.4491,
211
+ "eval_samples_per_second": 15.943,
212
  "eval_steps_per_second": 0.255,
213
  "eval_use_label": 0.0,
214
  "step": 100
215
  },
216
  {
217
  "epoch": 0.23,
218
+ "grad_norm": 1.59375,
219
  "learning_rate": 4.746717530629565e-06,
220
+ "logits/chosen": -1.7847106456756592,
221
+ "logits/rejected": -1.7590484619140625,
222
+ "logps/chosen": -85.73925018310547,
223
+ "logps/rejected": -106.20509338378906,
224
+ "loss": 0.6557,
225
  "pred_label": 0.0,
226
+ "rewards/accuracies": 0.3499999940395355,
227
+ "rewards/chosen": -0.14638465642929077,
228
+ "rewards/margins": 0.12975916266441345,
229
+ "rewards/rejected": -0.2761438190937042,
230
  "step": 110,
231
  "use_label": 0.0
232
  },
233
  {
234
  "epoch": 0.25,
235
+ "grad_norm": 1.828125,
236
  "learning_rate": 4.660472094042121e-06,
237
+ "logits/chosen": -1.1902318000793457,
238
+ "logits/rejected": -1.0542975664138794,
239
+ "logps/chosen": -108.4779052734375,
240
+ "logps/rejected": -127.95109558105469,
241
+ "loss": 0.6493,
242
  "pred_label": 0.0,
243
  "rewards/accuracies": 0.36250001192092896,
244
+ "rewards/chosen": -0.38532325625419617,
245
+ "rewards/margins": 0.1649974286556244,
246
+ "rewards/rejected": -0.5503206849098206,
247
  "step": 120,
248
  "use_label": 0.0
249
  },
250
  {
251
  "epoch": 0.27,
252
+ "grad_norm": 1.9375,
253
  "learning_rate": 4.5626458262912745e-06,
254
+ "logits/chosen": -0.818010687828064,
255
+ "logits/rejected": -0.7847374081611633,
256
+ "logps/chosen": -109.61775207519531,
257
+ "logps/rejected": -133.42086791992188,
258
+ "loss": 0.6524,
259
  "pred_label": 0.0,
260
+ "rewards/accuracies": 0.33125001192092896,
261
+ "rewards/chosen": -0.43839359283447266,
262
+ "rewards/margins": 0.16735044121742249,
263
+ "rewards/rejected": -0.6057440638542175,
264
  "step": 130,
265
  "use_label": 0.0
266
  },
267
  {
268
  "epoch": 0.29,
269
+ "grad_norm": 1.71875,
270
  "learning_rate": 4.453763107901676e-06,
271
+ "logits/chosen": -0.7395650148391724,
272
+ "logits/rejected": -0.8444339036941528,
273
+ "logps/chosen": -116.97528076171875,
274
+ "logps/rejected": -130.2399139404297,
275
+ "loss": 0.6381,
276
  "pred_label": 0.0,
277
+ "rewards/accuracies": 0.33125001192092896,
278
+ "rewards/chosen": -0.3622770607471466,
279
+ "rewards/margins": 0.1490650475025177,
280
+ "rewards/rejected": -0.5113420486450195,
281
  "step": 140,
282
  "use_label": 0.0
283
  },
284
  {
285
  "epoch": 0.31,
286
+ "grad_norm": 2.125,
287
  "learning_rate": 4.33440758555951e-06,
288
+ "logits/chosen": -0.6497868299484253,
289
+ "logits/rejected": -0.6378159523010254,
290
+ "logps/chosen": -89.60552978515625,
291
+ "logps/rejected": -115.42192077636719,
292
+ "loss": 0.6379,
293
  "pred_label": 0.0,
294
+ "rewards/accuracies": 0.3187499940395355,
295
+ "rewards/chosen": -0.2445882111787796,
296
+ "rewards/margins": 0.23124215006828308,
297
+ "rewards/rejected": -0.4758303761482239,
298
  "step": 150,
299
  "use_label": 0.0
300
  },
301
  {
302
  "epoch": 0.33,
303
+ "grad_norm": 2.15625,
304
  "learning_rate": 4.205219043576955e-06,
305
+ "logits/chosen": -0.3159053921699524,
306
+ "logits/rejected": -0.33064812421798706,
307
+ "logps/chosen": -99.68696594238281,
308
+ "logps/rejected": -129.45729064941406,
309
+ "loss": 0.6317,
310
  "pred_label": 0.0,
311
+ "rewards/accuracies": 0.2874999940395355,
312
+ "rewards/chosen": -0.35356926918029785,
313
+ "rewards/margins": 0.16687795519828796,
314
+ "rewards/rejected": -0.5204472541809082,
315
  "step": 160,
316
  "use_label": 0.0
317
  },
318
  {
319
  "epoch": 0.36,
320
+ "grad_norm": 2.4375,
321
  "learning_rate": 4.066889974440757e-06,
322
+ "logits/chosen": 0.14531800150871277,
323
+ "logits/rejected": 0.18166163563728333,
324
+ "logps/chosen": -95.45491027832031,
325
+ "logps/rejected": -125.1463623046875,
326
+ "loss": 0.6291,
327
  "pred_label": 0.0,
328
+ "rewards/accuracies": 0.29374998807907104,
329
+ "rewards/chosen": -0.39946848154067993,
330
+ "rewards/margins": 0.20978550612926483,
331
+ "rewards/rejected": -0.609254002571106,
332
  "step": 170,
333
  "use_label": 0.0
334
  },
335
  {
336
  "epoch": 0.38,
337
+ "grad_norm": 2.453125,
338
  "learning_rate": 3.92016186682789e-06,
339
+ "logits/chosen": -0.3282355070114136,
340
+ "logits/rejected": -0.21966704726219177,
341
+ "logps/chosen": -108.00712585449219,
342
+ "logps/rejected": -128.67587280273438,
343
+ "loss": 0.649,
344
  "pred_label": 0.0,
345
+ "rewards/accuracies": 0.35624998807907104,
346
+ "rewards/chosen": -0.4521949887275696,
347
+ "rewards/margins": 0.27172034978866577,
348
+ "rewards/rejected": -0.7239152789115906,
349
  "step": 180,
350
  "use_label": 0.0
351
  },
352
  {
353
  "epoch": 0.4,
354
+ "grad_norm": 1.84375,
355
  "learning_rate": 3.7658212309857576e-06,
356
+ "logits/chosen": -0.889633297920227,
357
+ "logits/rejected": -0.6851574778556824,
358
+ "logps/chosen": -91.25111389160156,
359
+ "logps/rejected": -118.9649887084961,
360
+ "loss": 0.6461,
361
  "pred_label": 0.0,
362
+ "rewards/accuracies": 0.33125001192092896,
363
+ "rewards/chosen": -0.32139474153518677,
364
+ "rewards/margins": 0.22424864768981934,
365
+ "rewards/rejected": -0.5456433892250061,
366
  "step": 190,
367
  "use_label": 0.0
368
  },
369
  {
370
  "epoch": 0.42,
371
+ "grad_norm": 1.9453125,
372
  "learning_rate": 3.604695382782159e-06,
373
+ "logits/chosen": -0.8204952478408813,
374
+ "logits/rejected": -0.7186430096626282,
375
+ "logps/chosen": -112.41142272949219,
376
+ "logps/rejected": -120.7835693359375,
377
+ "loss": 0.6376,
378
  "pred_label": 0.0,
379
+ "rewards/accuracies": 0.3125,
380
+ "rewards/chosen": -0.30735117197036743,
381
+ "rewards/margins": 0.169038325548172,
382
+ "rewards/rejected": -0.47638946771621704,
383
  "step": 200,
384
  "use_label": 0.0
385
  },
386
  {
387
  "epoch": 0.42,
388
+ "eval_logits/chosen": -0.023804781958460808,
389
+ "eval_logits/rejected": 0.04317883029580116,
390
+ "eval_logps/chosen": -97.96138000488281,
391
+ "eval_logps/rejected": -137.9141845703125,
392
+ "eval_loss": 0.6288520693778992,
393
  "eval_pred_label": 0.0,
394
+ "eval_rewards/accuracies": 0.3671875,
395
+ "eval_rewards/chosen": -0.34049129486083984,
396
+ "eval_rewards/margins": 0.26672109961509705,
397
+ "eval_rewards/rejected": -0.6072123646736145,
398
+ "eval_runtime": 125.433,
399
+ "eval_samples_per_second": 15.945,
400
  "eval_steps_per_second": 0.255,
401
  "eval_use_label": 0.0,
402
  "step": 200
403
  },
404
  {
405
  "epoch": 0.44,
406
+ "grad_norm": 2.265625,
407
  "learning_rate": 3.437648009023905e-06,
408
+ "logits/chosen": -0.05805685371160507,
409
+ "logits/rejected": -0.06056814268231392,
410
+ "logps/chosen": -88.78871154785156,
411
+ "logps/rejected": -124.3318862915039,
412
+ "loss": 0.6218,
413
  "pred_label": 0.0,
414
+ "rewards/accuracies": 0.375,
415
+ "rewards/chosen": -0.3281395435333252,
416
+ "rewards/margins": 0.28538644313812256,
417
+ "rewards/rejected": -0.613525927066803,
418
  "step": 210,
419
  "use_label": 0.0
420
  },
421
  {
422
  "epoch": 0.46,
423
+ "grad_norm": 2.21875,
424
  "learning_rate": 3.265574537815398e-06,
425
+ "logits/chosen": -0.1400775909423828,
426
+ "logits/rejected": -0.005620801355689764,
427
+ "logps/chosen": -133.7158660888672,
428
+ "logps/rejected": -136.84619140625,
429
+ "loss": 0.627,
430
  "pred_label": 0.0,
431
+ "rewards/accuracies": 0.30000001192092896,
432
+ "rewards/chosen": -0.5408719778060913,
433
+ "rewards/margins": 0.16390959918498993,
434
+ "rewards/rejected": -0.7047815918922424,
435
  "step": 220,
436
  "use_label": 0.0
437
  },
438
  {
439
  "epoch": 0.48,
440
+ "grad_norm": 1.8515625,
441
  "learning_rate": 3.089397338773569e-06,
442
+ "logits/chosen": 0.16266627609729767,
443
+ "logits/rejected": 0.2626825273036957,
444
+ "logps/chosen": -93.3644027709961,
445
+ "logps/rejected": -119.67996978759766,
446
+ "loss": 0.6261,
447
  "pred_label": 0.0,
448
+ "rewards/accuracies": 0.3187499940395355,
449
+ "rewards/chosen": -0.28929832577705383,
450
+ "rewards/margins": 0.27991363406181335,
451
+ "rewards/rejected": -0.5692119598388672,
452
  "step": 230,
453
  "use_label": 0.0
454
  },
455
  {
456
  "epoch": 0.5,
457
+ "grad_norm": 1.8984375,
458
  "learning_rate": 2.9100607788275547e-06,
459
+ "logits/chosen": 0.854693591594696,
460
+ "logits/rejected": 0.7261193990707397,
461
+ "logps/chosen": -99.00528717041016,
462
+ "logps/rejected": -135.73580932617188,
463
+ "loss": 0.6295,
464
  "pred_label": 0.0,
465
+ "rewards/accuracies": 0.3687500059604645,
466
+ "rewards/chosen": -0.2997274696826935,
467
+ "rewards/margins": 0.3153937757015228,
468
+ "rewards/rejected": -0.6151211857795715,
469
  "step": 240,
470
  "use_label": 0.0
471
  },
472
  {
473
  "epoch": 0.52,
474
+ "grad_norm": 2.03125,
475
  "learning_rate": 2.72852616010567e-06,
476
+ "logits/chosen": 0.6816203594207764,
477
+ "logits/rejected": 0.7033491134643555,
478
+ "logps/chosen": -119.7255859375,
479
+ "logps/rejected": -144.8857421875,
480
+ "loss": 0.6376,
481
  "pred_label": 0.0,
482
+ "rewards/accuracies": 0.3812499940395355,
483
+ "rewards/chosen": -0.4632648825645447,
484
+ "rewards/margins": 0.2932681143283844,
485
+ "rewards/rejected": -0.7565330266952515,
486
  "step": 250,
487
  "use_label": 0.0
488
  },
489
  {
490
  "epoch": 0.54,
491
+ "grad_norm": 1.8984375,
492
  "learning_rate": 2.5457665670441937e-06,
493
+ "logits/chosen": 0.5938165187835693,
494
+ "logits/rejected": 0.5592354536056519,
495
+ "logps/chosen": -110.32804870605469,
496
+ "logps/rejected": -146.76275634765625,
497
+ "loss": 0.6162,
498
  "pred_label": 0.0,
499
+ "rewards/accuracies": 0.34375,
500
+ "rewards/chosen": -0.44222426414489746,
501
+ "rewards/margins": 0.2809238135814667,
502
+ "rewards/rejected": -0.7231480479240417,
503
  "step": 260,
504
  "use_label": 0.0
505
  },
506
  {
507
  "epoch": 0.57,
508
+ "grad_norm": 2.90625,
509
  "learning_rate": 2.3627616503391813e-06,
510
+ "logits/chosen": 0.6390979290008545,
511
+ "logits/rejected": 0.5789315700531006,
512
+ "logps/chosen": -123.83528137207031,
513
+ "logps/rejected": -144.61489868164062,
514
+ "loss": 0.6162,
515
  "pred_label": 0.0,
516
+ "rewards/accuracies": 0.36250001192092896,
517
+ "rewards/chosen": -0.5091949701309204,
518
+ "rewards/margins": 0.24320097267627716,
519
+ "rewards/rejected": -0.7523959279060364,
520
  "step": 270,
521
  "use_label": 0.0
522
  },
523
  {
524
  "epoch": 0.59,
525
+ "grad_norm": 2.34375,
526
  "learning_rate": 2.1804923757009885e-06,
527
+ "logits/chosen": 0.8771865963935852,
528
+ "logits/rejected": 1.0158352851867676,
529
+ "logps/chosen": -118.5296859741211,
530
+ "logps/rejected": -138.31729125976562,
531
+ "loss": 0.6357,
532
  "pred_label": 0.0,
533
  "rewards/accuracies": 0.30000001192092896,
534
+ "rewards/chosen": -0.5302416086196899,
535
+ "rewards/margins": 0.2237352430820465,
536
+ "rewards/rejected": -0.7539768218994141,
537
  "step": 280,
538
  "use_label": 0.0
539
  },
540
  {
541
  "epoch": 0.61,
542
+ "grad_norm": 2.59375,
543
  "learning_rate": 1.9999357655598894e-06,
544
+ "logits/chosen": 0.44083184003829956,
545
+ "logits/rejected": 0.41123947501182556,
546
+ "logps/chosen": -112.27372741699219,
547
+ "logps/rejected": -146.95498657226562,
548
+ "loss": 0.6228,
549
  "pred_label": 0.0,
550
  "rewards/accuracies": 0.30000001192092896,
551
+ "rewards/chosen": -0.4572528004646301,
552
+ "rewards/margins": 0.24868395924568176,
553
+ "rewards/rejected": -0.7059367299079895,
554
  "step": 290,
555
  "use_label": 0.0
556
  },
557
  {
558
  "epoch": 0.63,
559
+ "grad_norm": 2.34375,
560
  "learning_rate": 1.8220596619089576e-06,
561
+ "logits/chosen": 0.6273639798164368,
562
+ "logits/rejected": 0.5140804052352905,
563
+ "logps/chosen": -123.02046966552734,
564
+ "logps/rejected": -168.80987548828125,
565
+ "loss": 0.6196,
566
+ "pred_label": 0.0,
567
+ "rewards/accuracies": 0.40625,
568
+ "rewards/chosen": -0.4542613625526428,
569
+ "rewards/margins": 0.2926333546638489,
570
+ "rewards/rejected": -0.7468947172164917,
571
  "step": 300,
572
  "use_label": 0.0
573
  },
574
  {
575
  "epoch": 0.63,
576
+ "eval_logits/chosen": 1.0944873094558716,
577
+ "eval_logits/rejected": 1.1831356287002563,
578
+ "eval_logps/chosen": -102.62176513671875,
579
+ "eval_logps/rejected": -150.12503051757812,
580
+ "eval_loss": 0.618873655796051,
581
  "eval_pred_label": 0.0,
582
+ "eval_rewards/accuracies": 0.375,
583
+ "eval_rewards/chosen": -0.3870951533317566,
584
+ "eval_rewards/margins": 0.34222573041915894,
585
+ "eval_rewards/rejected": -0.7293209433555603,
586
+ "eval_runtime": 125.4362,
587
+ "eval_samples_per_second": 15.944,
588
  "eval_steps_per_second": 0.255,
589
  "eval_use_label": 0.0,
590
  "step": 300
591
  },
592
  {
593
  "epoch": 0.65,
594
+ "grad_norm": 1.8515625,
595
  "learning_rate": 1.647817538357072e-06,
596
+ "logits/chosen": 0.8131985664367676,
597
+ "logits/rejected": 0.8752232789993286,
598
+ "logps/chosen": -91.52378845214844,
599
+ "logps/rejected": -139.95840454101562,
600
+ "loss": 0.5999,
601
  "pred_label": 0.0,
602
+ "rewards/accuracies": 0.3687500059604645,
603
+ "rewards/chosen": -0.3592718541622162,
604
+ "rewards/margins": 0.3578081727027893,
605
+ "rewards/rejected": -0.7170799970626831,
606
  "step": 310,
607
  "use_label": 0.0
608
  },
609
  {
610
  "epoch": 0.67,
611
+ "grad_norm": 2.40625,
612
  "learning_rate": 1.4781433892011132e-06,
613
+ "logits/chosen": 0.9751952886581421,
614
+ "logits/rejected": 1.1630818843841553,
615
+ "logps/chosen": -135.82566833496094,
616
+ "logps/rejected": -168.11805725097656,
617
+ "loss": 0.6109,
618
  "pred_label": 0.0,
619
+ "rewards/accuracies": 0.3687500059604645,
620
+ "rewards/chosen": -0.6275521516799927,
621
+ "rewards/margins": 0.3816707730293274,
622
+ "rewards/rejected": -1.0092228651046753,
623
  "step": 320,
624
  "use_label": 0.0
625
  },
626
  {
627
  "epoch": 0.69,
628
+ "grad_norm": 1.984375,
629
  "learning_rate": 1.3139467229135999e-06,
630
+ "logits/chosen": 1.3293979167938232,
631
+ "logits/rejected": 1.3260401487350464,
632
+ "logps/chosen": -135.96664428710938,
633
+ "logps/rejected": -166.52359008789062,
634
+ "loss": 0.6295,
635
  "pred_label": 0.0,
636
+ "rewards/accuracies": 0.33125001192092896,
637
+ "rewards/chosen": -0.6585850715637207,
638
+ "rewards/margins": 0.3205706775188446,
639
+ "rewards/rejected": -0.9791557192802429,
640
  "step": 330,
641
  "use_label": 0.0
642
  },
643
  {
644
  "epoch": 0.71,
645
+ "grad_norm": 2.09375,
646
  "learning_rate": 1.1561076868822756e-06,
647
+ "logits/chosen": 0.7383319139480591,
648
+ "logits/rejected": 0.6407849192619324,
649
+ "logps/chosen": -150.60504150390625,
650
+ "logps/rejected": -166.74940490722656,
651
+ "loss": 0.6247,
652
  "pred_label": 0.0,
653
+ "rewards/accuracies": 0.3499999940395355,
654
+ "rewards/chosen": -0.658658504486084,
655
+ "rewards/margins": 0.24373307824134827,
656
+ "rewards/rejected": -0.9023915529251099,
657
  "step": 340,
658
  "use_label": 0.0
659
  },
660
  {
661
  "epoch": 0.73,
662
+ "grad_norm": 2.21875,
663
  "learning_rate": 1.0054723495346484e-06,
664
+ "logits/chosen": 0.6359546184539795,
665
+ "logits/rejected": 0.7167641520500183,
666
+ "logps/chosen": -163.8385772705078,
667
+ "logps/rejected": -195.6297607421875,
668
+ "loss": 0.6138,
669
  "pred_label": 0.0,
670
+ "rewards/accuracies": 0.36250001192092896,
671
+ "rewards/chosen": -0.7442194819450378,
672
+ "rewards/margins": 0.3593973219394684,
673
+ "rewards/rejected": -1.103616714477539,
674
  "step": 350,
675
  "use_label": 0.0
676
  },
677
  {
678
  "epoch": 0.75,
679
+ "grad_norm": 1.859375,
680
  "learning_rate": 8.628481651367876e-07,
681
+ "logits/chosen": 0.7298086881637573,
682
+ "logits/rejected": 0.8517257571220398,
683
+ "logps/chosen": -119.41548156738281,
684
+ "logps/rejected": -165.3460235595703,
685
+ "loss": 0.6137,
686
  "pred_label": 0.0,
687
+ "rewards/accuracies": 0.3812499940395355,
688
+ "rewards/chosen": -0.5577787160873413,
689
+ "rewards/margins": 0.37339919805526733,
690
+ "rewards/rejected": -0.9311779141426086,
691
  "step": 360,
692
  "use_label": 0.0
693
  },
694
  {
695
  "epoch": 0.77,
696
+ "grad_norm": 2.421875,
697
  "learning_rate": 7.289996455765749e-07,
698
+ "logits/chosen": 0.8383787274360657,
699
+ "logits/rejected": 0.9305205345153809,
700
+ "logps/chosen": -111.84449768066406,
701
+ "logps/rejected": -153.93136596679688,
702
+ "loss": 0.6125,
703
  "pred_label": 0.0,
704
+ "rewards/accuracies": 0.3499999940395355,
705
+ "rewards/chosen": -0.46409696340560913,
706
+ "rewards/margins": 0.39606258273124695,
707
+ "rewards/rejected": -0.8601595759391785,
708
  "step": 370,
709
  "use_label": 0.0
710
  },
711
  {
712
  "epoch": 0.8,
713
+ "grad_norm": 1.8984375,
714
  "learning_rate": 6.046442623320145e-07,
715
+ "logits/chosen": 0.5329448580741882,
716
+ "logits/rejected": 0.513522744178772,
717
+ "logps/chosen": -116.62841796875,
718
+ "logps/rejected": -165.17893981933594,
719
+ "loss": 0.6191,
720
  "pred_label": 0.0,
721
  "rewards/accuracies": 0.3187499940395355,
722
+ "rewards/chosen": -0.5079302787780762,
723
+ "rewards/margins": 0.2802185118198395,
724
+ "rewards/rejected": -0.7881487607955933,
725
  "step": 380,
726
  "use_label": 0.0
727
  },
728
  {
729
  "epoch": 0.82,
730
+ "grad_norm": 2.4375,
731
  "learning_rate": 4.904486005914027e-07,
732
+ "logits/chosen": 0.8266662359237671,
733
+ "logits/rejected": 0.5234752893447876,
734
+ "logps/chosen": -159.83407592773438,
735
+ "logps/rejected": -186.96768188476562,
736
+ "loss": 0.6085,
737
  "pred_label": 0.0,
738
+ "rewards/accuracies": 0.38749998807907104,
739
+ "rewards/chosen": -0.6701575517654419,
740
+ "rewards/margins": 0.36982032656669617,
741
+ "rewards/rejected": -1.039977788925171,
742
  "step": 390,
743
  "use_label": 0.0
744
  },
745
  {
746
  "epoch": 0.84,
747
+ "grad_norm": 2.46875,
748
  "learning_rate": 3.8702478614051353e-07,
749
+ "logits/chosen": 0.511390745639801,
750
+ "logits/rejected": 0.6720080971717834,
751
+ "logps/chosen": -116.7987060546875,
752
+ "logps/rejected": -141.3931884765625,
753
+ "loss": 0.6139,
754
  "pred_label": 0.0,
755
+ "rewards/accuracies": 0.3812499940395355,
756
+ "rewards/chosen": -0.4430771768093109,
757
+ "rewards/margins": 0.3362268805503845,
758
+ "rewards/rejected": -0.779304027557373,
759
  "step": 400,
760
  "use_label": 0.0
761
  },
762
  {
763
  "epoch": 0.84,
764
+ "eval_logits/chosen": 1.4532994031906128,
765
+ "eval_logits/rejected": 1.5453113317489624,
766
+ "eval_logps/chosen": -112.56050109863281,
767
+ "eval_logps/rejected": -162.19764709472656,
768
+ "eval_loss": 0.6157013177871704,
769
  "eval_pred_label": 0.0,
770
+ "eval_rewards/accuracies": 0.37109375,
771
+ "eval_rewards/chosen": -0.4864824414253235,
772
+ "eval_rewards/margins": 0.36356455087661743,
773
+ "eval_rewards/rejected": -0.8500469923019409,
774
+ "eval_runtime": 125.4203,
775
+ "eval_samples_per_second": 15.946,
776
  "eval_steps_per_second": 0.255,
777
  "eval_use_label": 0.0,
778
  "step": 400
779
  },
780
  {
781
  "epoch": 0.86,
782
+ "grad_norm": 2.203125,
783
  "learning_rate": 2.9492720416985004e-07,
784
+ "logits/chosen": 0.8359997868537903,
785
+ "logits/rejected": 0.8144146800041199,
786
+ "logps/chosen": -110.30177307128906,
787
+ "logps/rejected": -143.6800079345703,
788
+ "loss": 0.6222,
789
  "pred_label": 0.0,
790
+ "rewards/accuracies": 0.39375001192092896,
791
+ "rewards/chosen": -0.4887877404689789,
792
+ "rewards/margins": 0.3508199453353882,
793
+ "rewards/rejected": -0.8396075963973999,
794
  "step": 410,
795
  "use_label": 0.0
796
  },
797
  {
798
  "epoch": 0.88,
799
+ "grad_norm": 1.984375,
800
  "learning_rate": 2.1464952759020857e-07,
801
+ "logits/chosen": 1.027252435684204,
802
+ "logits/rejected": 0.9827619791030884,
803
+ "logps/chosen": -106.49784851074219,
804
+ "logps/rejected": -116.97566223144531,
805
+ "loss": 0.6216,
806
  "pred_label": 0.0,
807
  "rewards/accuracies": 0.2750000059604645,
808
+ "rewards/chosen": -0.4555872976779938,
809
+ "rewards/margins": 0.20033884048461914,
810
+ "rewards/rejected": -0.6559261083602905,
811
  "step": 420,
812
  "use_label": 0.0
813
  },
814
  {
815
  "epoch": 0.9,
816
+ "grad_norm": 1.96875,
817
  "learning_rate": 1.4662207078575685e-07,
818
+ "logits/chosen": 0.9206047058105469,
819
+ "logits/rejected": 0.8673297166824341,
820
+ "logps/chosen": -151.376220703125,
821
+ "logps/rejected": -178.04725646972656,
822
+ "loss": 0.5986,
823
  "pred_label": 0.0,
824
  "rewards/accuracies": 0.45625001192092896,
825
+ "rewards/chosen": -0.5210937261581421,
826
+ "rewards/margins": 0.46580758690834045,
827
+ "rewards/rejected": -0.9869012832641602,
828
  "step": 430,
829
  "use_label": 0.0
830
  },
831
  {
832
  "epoch": 0.92,
833
+ "grad_norm": 2.125,
834
  "learning_rate": 9.120948298936422e-08,
835
+ "logits/chosen": 0.9004503488540649,
836
+ "logits/rejected": 1.0573413372039795,
837
+ "logps/chosen": -119.21500396728516,
838
+ "logps/rejected": -165.19241333007812,
839
+ "loss": 0.6064,
840
  "pred_label": 0.0,
841
+ "rewards/accuracies": 0.35624998807907104,
842
+ "rewards/chosen": -0.5231102705001831,
843
+ "rewards/margins": 0.37818416953086853,
844
+ "rewards/rejected": -0.9012944102287292,
845
  "step": 440,
846
  "use_label": 0.0
847
  },
848
  {
849
  "epoch": 0.94,
850
+ "grad_norm": 2.46875,
851
  "learning_rate": 4.870879364444109e-08,
852
+ "logits/chosen": 1.300728440284729,
853
+ "logits/rejected": 1.0580918788909912,
854
+ "logps/chosen": -129.29281616210938,
855
+ "logps/rejected": -178.3690948486328,
856
+ "loss": 0.6111,
857
  "pred_label": 0.0,
858
+ "rewards/accuracies": 0.3499999940395355,
859
+ "rewards/chosen": -0.570349931716919,
860
+ "rewards/margins": 0.3304445147514343,
861
+ "rewards/rejected": -0.9007943868637085,
862
  "step": 450,
863
  "use_label": 0.0
864
  },
865
  {
866
  "epoch": 0.96,
867
+ "grad_norm": 1.8359375,
868
  "learning_rate": 1.93478202307823e-08,
869
+ "logits/chosen": 1.1906068325042725,
870
+ "logits/rejected": 1.2149587869644165,
871
+ "logps/chosen": -83.74864196777344,
872
+ "logps/rejected": -130.91348266601562,
873
+ "loss": 0.6154,
874
  "pred_label": 0.0,
875
+ "rewards/accuracies": 0.3375000059604645,
876
+ "rewards/chosen": -0.3762877583503723,
877
+ "rewards/margins": 0.2993956208229065,
878
+ "rewards/rejected": -0.6756833791732788,
879
  "step": 460,
880
  "use_label": 0.0
881
  },
882
  {
883
  "epoch": 0.98,
884
+ "grad_norm": 2.375,
885
  "learning_rate": 3.283947088983663e-09,
886
+ "logits/chosen": 1.1844379901885986,
887
+ "logits/rejected": 0.9474547505378723,
888
+ "logps/chosen": -113.1079330444336,
889
+ "logps/rejected": -141.49147033691406,
890
+ "loss": 0.6213,
891
  "pred_label": 0.0,
892
  "rewards/accuracies": 0.3125,
893
+ "rewards/chosen": -0.4577876627445221,
894
+ "rewards/margins": 0.26655709743499756,
895
+ "rewards/rejected": -0.7243447303771973,
896
  "step": 470,
897
  "use_label": 0.0
898
  },
 
900
  "epoch": 1.0,
901
  "step": 477,
902
  "total_flos": 0.0,
903
+ "train_loss": 0.6357159084743924,
904
+ "train_runtime": 9601.7268,
905
+ "train_samples_per_second": 6.367,
906
  "train_steps_per_second": 0.05
907
  }
908
  ],