nnheui commited on
Commit
c3d5b59
1 Parent(s): 297141c

Model save

Browse files
README.md CHANGED
@@ -1,16 +1,10 @@
1
  ---
 
2
  base_model: nnheui/pythia-1.4b-sft-full
3
  tags:
4
- - alignment-handbook
5
  - trl
6
  - dpo
7
  - generated_from_trainer
8
- - trl
9
- - dpo
10
- - alignment-handbook
11
- - generated_from_trainer
12
- datasets:
13
- - HuggingFaceH4/ultrafeedback_binarized
14
  model-index:
15
  - name: pythia-1.4b-dpo-full
16
  results: []
@@ -21,19 +15,21 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # pythia-1.4b-dpo-full
23
 
24
- This model is a fine-tuned version of [nnheui/pythia-1.4b-sft-full](https://huggingface.co/nnheui/pythia-1.4b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 0.6257
27
  - Rewards/chosen: -0.5234
28
- - Rewards/rejected: -0.7812
29
- - Rewards/accuracies: 0.6597
30
- - Rewards/margins: 0.2578
31
  - Logps/rejected: -416.0
32
  - Logps/chosen: -446.0
33
  - Logits/rejected: -1.2422
34
  - Logits/chosen: -1.1953
35
- - Logps/chosen Bottom Tokens: -0.0007
36
- - Logps/rejected Bottom Tokens: -0.0007
 
 
37
 
38
  ## Model description
39
 
@@ -68,13 +64,13 @@ The following hyperparameters were used during training:
68
 
69
  ### Training results
70
 
71
- | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/bottom Tokens | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
72
- |:-------------:|:------:|:----:|:-------------:|:---------------:|:-------------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
73
- | 0.678 | 0.1963 | 100 | -1.0938 | -1.1562 | -0.0009 | -396.0 | -344.0 | 0.6789 | 0.5881 | -0.0275 | 0.0332 | -0.0608 |
74
- | 0.645 | 0.3925 | 200 | -1.1562 | -1.2031 | -0.0009 | -422.0 | -380.0 | 0.6489 | 0.6448 | -0.2871 | 0.1367 | -0.4238 |
75
- | 0.6396 | 0.5888 | 300 | -1.1875 | -1.2344 | -0.0008 | -438.0 | -406.0 | 0.6304 | 0.6627 | -0.4512 | 0.2275 | -0.6797 |
76
- | 0.6102 | 0.7851 | 400 | -1.1875 | -1.2344 | -0.0007 | -444.0 | -414.0 | 0.6268 | 0.6567 | -0.5039 | 0.2578 | -0.7617 |
77
- | 0.6084 | 0.9814 | 500 | -1.1953 | -1.2422 | -0.0007 | -446.0 | -416.0 | 0.6259 | 0.6567 | -0.5234 | 0.2617 | -0.7852 |
78
 
79
 
80
  ### Framework versions
 
1
  ---
2
+ license: apache-2.0
3
  base_model: nnheui/pythia-1.4b-sft-full
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
 
 
 
8
  model-index:
9
  - name: pythia-1.4b-dpo-full
10
  results: []
 
15
 
16
  # pythia-1.4b-dpo-full
17
 
18
+ This model is a fine-tuned version of [nnheui/pythia-1.4b-sft-full](https://huggingface.co/nnheui/pythia-1.4b-sft-full) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6259
21
  - Rewards/chosen: -0.5234
22
+ - Rewards/rejected: -0.7852
23
+ - Rewards/accuracies: 0.6567
24
+ - Rewards/margins: 0.2617
25
  - Logps/rejected: -416.0
26
  - Logps/chosen: -446.0
27
  - Logits/rejected: -1.2422
28
  - Logits/chosen: -1.1953
29
+ - Logps/chosen Top Tokens: -0.0007
30
+ - Logps/rejected Top Tokens: -0.0007
31
+ - Logps/chosen Bottom Tokens: -14.375
32
+ - Logps/rejected Bottom Tokens: -14.3125
33
 
34
  ## Model description
35
 
 
64
 
65
  ### Training results
66
 
67
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Logps/chosen Top Tokens | Logps/rejected Top Tokens | Logps/chosen Bottom Tokens | Logps/rejected Bottom Tokens |
68
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:-----------------------:|:-------------------------:|:--------------------------:|:----------------------------:|
69
+ | 0.678 | 0.1963 | 100 | 0.6789 | -0.0275 | -0.0608 | 0.5881 | 0.0332 | -344.0 | -396.0 | -1.1562 | -1.0938 | -0.0009 | -0.0009 | -14.0625 | -14.0 |
70
+ | 0.645 | 0.3925 | 200 | 0.6489 | -0.2871 | -0.4238 | 0.6448 | 0.1367 | -380.0 | -422.0 | -1.2031 | -1.1562 | -0.0009 | -0.0009 | -14.375 | -14.3125 |
71
+ | 0.6396 | 0.5888 | 300 | 0.6304 | -0.4512 | -0.6797 | 0.6627 | 0.2275 | -406.0 | -438.0 | -1.2344 | -1.1875 | -0.0007 | -0.0008 | -14.375 | -14.3125 |
72
+ | 0.6102 | 0.7851 | 400 | 0.6268 | -0.5039 | -0.7617 | 0.6567 | 0.2578 | -414.0 | -444.0 | -1.2344 | -1.1875 | -0.0007 | -0.0007 | -14.3125 | -14.25 |
73
+ | 0.6084 | 0.9814 | 500 | 0.6259 | -0.5234 | -0.7852 | 0.6567 | 0.2617 | -416.0 | -446.0 | -1.2422 | -1.1953 | -0.0007 | -0.0007 | -14.375 | -14.3125 |
74
 
75
 
76
  ### Framework versions
all_results.json CHANGED
@@ -1,25 +1,9 @@
1
  {
2
  "epoch": 0.9990186457311089,
3
- "eval_logits/chosen": -1.1953125,
4
- "eval_logits/rejected": -1.2421875,
5
- "eval_logps/bottom_tokens": -0.00072479248046875,
6
- "eval_logps/chosen": -446.0,
7
- "eval_logps/chosen_bottom_tokens": -0.00072479248046875,
8
- "eval_logps/rejected": -416.0,
9
- "eval_logps/rejected_bottom_tokens": -0.000728607177734375,
10
- "eval_loss": 0.6256738305091858,
11
- "eval_rewards/accuracies": 0.6597015857696533,
12
- "eval_rewards/chosen": -0.5234375,
13
- "eval_rewards/margins": 0.2578125,
14
- "eval_rewards/rejected": -0.78125,
15
- "eval_runtime": 103.3574,
16
- "eval_samples": 2000,
17
- "eval_samples_per_second": 19.35,
18
- "eval_steps_per_second": 0.648,
19
  "total_flos": 0.0,
20
- "train_loss": 0.011100004604616897,
21
- "train_runtime": 142.2419,
22
  "train_samples": 61134,
23
- "train_samples_per_second": 429.789,
24
- "train_steps_per_second": 3.578
25
  }
 
1
  {
2
  "epoch": 0.9990186457311089,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6464882252961105,
5
+ "train_runtime": 8284.9703,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 7.379,
8
+ "train_steps_per_second": 0.061
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811b22c69714101dba0a13cf905313113be9cd2fb0da880dd2a6f3f027fb2922
3
  size 2829330208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dee645992f24ee02b486f2e81b344b7a98df284d1f79aa4f2f1679fdd185f99d
3
  size 2829330208
runs/Jul08_12-10-46_42dbe5cf9ed4/events.out.tfevents.1720441204.42dbe5cf9ed4.850889.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:581c4a7ad45f8f9d52a7cde060b05be03accc097a3afeeaacf2ca6a96d1c0d36
3
- size 59325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b40fca5819f39d3096494f914d8fe42e16c323760be5cc1318d748ca3204d89
3
+ size 59679
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9990186457311089,
3
  "total_flos": 0.0,
4
- "train_loss": 0.011100004604616897,
5
- "train_runtime": 142.2419,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 429.789,
8
- "train_steps_per_second": 3.578
9
  }
 
1
  {
2
  "epoch": 0.9990186457311089,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6464882252961105,
5
+ "train_runtime": 8284.9703,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 7.379,
8
+ "train_steps_per_second": 0.061
9
  }
trainer_state.json CHANGED
@@ -14,9 +14,12 @@
14
  "learning_rate": 9.803921568627451e-09,
15
  "logits/chosen": -1.125,
16
  "logits/rejected": -1.1875,
17
- "logps/bottom_tokens": -0.000553131103515625,
18
  "logps/chosen": -500.0,
 
 
19
  "logps/rejected": -520.0,
 
 
20
  "loss": 0.6914,
21
  "rewards/accuracies": 0.0,
22
  "rewards/chosen": 0.0,
@@ -30,9 +33,12 @@
30
  "learning_rate": 9.80392156862745e-08,
31
  "logits/chosen": -1.1640625,
32
  "logits/rejected": -1.203125,
33
- "logps/bottom_tokens": -0.00081634521484375,
34
  "logps/chosen": -380.0,
 
 
35
  "logps/rejected": -316.0,
 
 
36
  "loss": 0.6922,
37
  "rewards/accuracies": 0.41111111640930176,
38
  "rewards/chosen": -0.000202178955078125,
@@ -46,9 +52,12 @@
46
  "learning_rate": 1.96078431372549e-07,
47
  "logits/chosen": -1.0859375,
48
  "logits/rejected": -1.1484375,
49
- "logps/bottom_tokens": -0.0008392333984375,
50
  "logps/chosen": -374.0,
 
 
51
  "logps/rejected": -324.0,
 
 
52
  "loss": 0.6913,
53
  "rewards/accuracies": 0.4399999976158142,
54
  "rewards/chosen": 0.003662109375,
@@ -62,9 +71,12 @@
62
  "learning_rate": 2.941176470588235e-07,
63
  "logits/chosen": -1.125,
64
  "logits/rejected": -1.1796875,
65
- "logps/bottom_tokens": -0.000774383544921875,
66
  "logps/chosen": -364.0,
 
 
67
  "logps/rejected": -324.0,
 
 
68
  "loss": 0.6938,
69
  "rewards/accuracies": 0.3850000202655792,
70
  "rewards/chosen": -0.0030670166015625,
@@ -78,9 +90,12 @@
78
  "learning_rate": 3.92156862745098e-07,
79
  "logits/chosen": -1.125,
80
  "logits/rejected": -1.15625,
81
- "logps/bottom_tokens": -0.000782012939453125,
82
  "logps/chosen": -378.0,
 
 
83
  "logps/rejected": -338.0,
 
 
84
  "loss": 0.6929,
85
  "rewards/accuracies": 0.445000022649765,
86
  "rewards/chosen": 0.000881195068359375,
@@ -94,9 +109,12 @@
94
  "learning_rate": 4.901960784313725e-07,
95
  "logits/chosen": -1.1171875,
96
  "logits/rejected": -1.1484375,
97
- "logps/bottom_tokens": -0.000789642333984375,
98
  "logps/chosen": -406.0,
 
 
99
  "logps/rejected": -352.0,
 
 
100
  "loss": 0.6915,
101
  "rewards/accuracies": 0.5400000214576721,
102
  "rewards/chosen": 0.0026397705078125,
@@ -110,9 +128,12 @@
110
  "learning_rate": 4.995237599803335e-07,
111
  "logits/chosen": -1.140625,
112
  "logits/rejected": -1.203125,
113
- "logps/bottom_tokens": -0.000797271728515625,
114
  "logps/chosen": -406.0,
 
 
115
  "logps/rejected": -322.0,
 
 
116
  "loss": 0.6913,
117
  "rewards/accuracies": 0.42500001192092896,
118
  "rewards/chosen": 0.00244140625,
@@ -126,9 +147,12 @@
126
  "learning_rate": 4.978798275112142e-07,
127
  "logits/chosen": -1.09375,
128
  "logits/rejected": -1.1328125,
129
- "logps/bottom_tokens": -0.00078582763671875,
130
  "logps/chosen": -372.0,
 
 
131
  "logps/rejected": -330.0,
 
 
132
  "loss": 0.688,
133
  "rewards/accuracies": 0.5049999952316284,
134
  "rewards/chosen": 0.00897216796875,
@@ -142,9 +166,12 @@
142
  "learning_rate": 4.950700530747689e-07,
143
  "logits/chosen": -1.078125,
144
  "logits/rejected": -1.1484375,
145
- "logps/bottom_tokens": -0.000904083251953125,
146
  "logps/chosen": -378.0,
 
 
147
  "logps/rejected": -308.0,
 
 
148
  "loss": 0.685,
149
  "rewards/accuracies": 0.5450000166893005,
150
  "rewards/chosen": -0.00121307373046875,
@@ -158,9 +185,12 @@
158
  "learning_rate": 4.911076517558622e-07,
159
  "logits/chosen": -1.125,
160
  "logits/rejected": -1.15625,
161
- "logps/bottom_tokens": -0.000835418701171875,
162
  "logps/chosen": -382.0,
 
 
163
  "logps/rejected": -346.0,
 
 
164
  "loss": 0.6832,
165
  "rewards/accuracies": 0.5600000023841858,
166
  "rewards/chosen": -0.0106201171875,
@@ -174,9 +204,12 @@
174
  "learning_rate": 4.860112597371772e-07,
175
  "logits/chosen": -1.125,
176
  "logits/rejected": -1.171875,
177
- "logps/bottom_tokens": -0.0009002685546875,
178
  "logps/chosen": -372.0,
 
 
179
  "logps/rejected": -328.0,
 
 
180
  "loss": 0.678,
181
  "rewards/accuracies": 0.699999988079071,
182
  "rewards/chosen": -0.017333984375,
@@ -188,17 +221,20 @@
188
  "epoch": 0.19627085377821393,
189
  "eval_logits/chosen": -1.09375,
190
  "eval_logits/rejected": -1.15625,
191
- "eval_logps/bottom_tokens": -0.0008697509765625,
192
  "eval_logps/chosen": -396.0,
 
 
193
  "eval_logps/rejected": -344.0,
 
 
194
  "eval_loss": 0.6789160370826721,
195
  "eval_rewards/accuracies": 0.5880597233772278,
196
  "eval_rewards/chosen": -0.0274658203125,
197
  "eval_rewards/margins": 0.033203125,
198
  "eval_rewards/rejected": -0.060791015625,
199
- "eval_runtime": 94.4387,
200
- "eval_samples_per_second": 21.178,
201
- "eval_steps_per_second": 0.709,
202
  "step": 100
203
  },
204
  {
@@ -207,9 +243,12 @@
207
  "learning_rate": 4.798048466485017e-07,
208
  "logits/chosen": -1.1015625,
209
  "logits/rejected": -1.109375,
210
- "logps/bottom_tokens": -0.0008544921875,
211
  "logps/chosen": -344.0,
 
 
212
  "logps/rejected": -332.0,
 
 
213
  "loss": 0.6804,
214
  "rewards/accuracies": 0.5899999737739563,
215
  "rewards/chosen": -0.037109375,
@@ -223,9 +262,12 @@
223
  "learning_rate": 4.725176028314541e-07,
224
  "logits/chosen": -1.109375,
225
  "logits/rejected": -1.1171875,
226
- "logps/bottom_tokens": -0.0008544921875,
227
  "logps/chosen": -372.0,
 
 
228
  "logps/rejected": -354.0,
 
 
229
  "loss": 0.6745,
230
  "rewards/accuracies": 0.6399999856948853,
231
  "rewards/chosen": -0.03564453125,
@@ -239,9 +281,12 @@
239
  "learning_rate": 4.641838020498713e-07,
240
  "logits/chosen": -1.09375,
241
  "logits/rejected": -1.1640625,
242
- "logps/bottom_tokens": -0.000926971435546875,
243
  "logps/chosen": -408.0,
 
 
244
  "logps/rejected": -338.0,
 
 
245
  "loss": 0.6674,
246
  "rewards/accuracies": 0.5849999785423279,
247
  "rewards/chosen": -0.0703125,
@@ -255,9 +300,12 @@
255
  "learning_rate": 4.5484264029156733e-07,
256
  "logits/chosen": -1.1015625,
257
  "logits/rejected": -1.1484375,
258
- "logps/bottom_tokens": -0.000823974609375,
259
  "logps/chosen": -386.0,
 
 
260
  "logps/rejected": -336.0,
 
 
261
  "loss": 0.6635,
262
  "rewards/accuracies": 0.5900000333786011,
263
  "rewards/chosen": -0.1015625,
@@ -271,9 +319,12 @@
271
  "learning_rate": 4.445380514196192e-07,
272
  "logits/chosen": -1.09375,
273
  "logits/rejected": -1.171875,
274
- "logps/bottom_tokens": -0.0008697509765625,
275
  "logps/chosen": -428.0,
 
 
276
  "logps/rejected": -356.0,
 
 
277
  "loss": 0.668,
278
  "rewards/accuracies": 0.6350000500679016,
279
  "rewards/chosen": -0.12890625,
@@ -287,9 +338,12 @@
287
  "learning_rate": 4.33318500540218e-07,
288
  "logits/chosen": -1.0859375,
289
  "logits/rejected": -1.1328125,
290
- "logps/bottom_tokens": -0.000896453857421875,
291
  "logps/chosen": -408.0,
 
 
292
  "logps/rejected": -368.0,
 
 
293
  "loss": 0.6655,
294
  "rewards/accuracies": 0.6050000190734863,
295
  "rewards/chosen": -0.16796875,
@@ -303,9 +357,12 @@
303
  "learning_rate": 4.2123675605892985e-07,
304
  "logits/chosen": -1.078125,
305
  "logits/rejected": -1.1484375,
306
- "logps/bottom_tokens": -0.00099945068359375,
307
  "logps/chosen": -422.0,
 
 
308
  "logps/rejected": -364.0,
 
 
309
  "loss": 0.6585,
310
  "rewards/accuracies": 0.6450000405311584,
311
  "rewards/chosen": -0.1826171875,
@@ -319,9 +376,12 @@
319
  "learning_rate": 4.0834964149744333e-07,
320
  "logits/chosen": -1.1015625,
321
  "logits/rejected": -1.15625,
322
- "logps/bottom_tokens": -0.00096893310546875,
323
  "logps/chosen": -416.0,
 
 
324
  "logps/rejected": -380.0,
 
 
325
  "loss": 0.6643,
326
  "rewards/accuracies": 0.6100000143051147,
327
  "rewards/chosen": -0.232421875,
@@ -335,9 +395,12 @@
335
  "learning_rate": 3.947177682380738e-07,
336
  "logits/chosen": -1.1796875,
337
  "logits/rejected": -1.203125,
338
- "logps/bottom_tokens": -0.000804901123046875,
339
  "logps/chosen": -378.0,
 
 
340
  "logps/rejected": -356.0,
 
 
341
  "loss": 0.6499,
342
  "rewards/accuracies": 0.6450001001358032,
343
  "rewards/chosen": -0.2412109375,
@@ -351,9 +414,12 @@
351
  "learning_rate": 3.804052504529933e-07,
352
  "logits/chosen": -1.1796875,
353
  "logits/rejected": -1.2265625,
354
- "logps/bottom_tokens": -0.000858306884765625,
355
  "logps/chosen": -392.0,
 
 
356
  "logps/rejected": -370.0,
 
 
357
  "loss": 0.645,
358
  "rewards/accuracies": 0.6350000500679016,
359
  "rewards/chosen": -0.271484375,
@@ -365,17 +431,20 @@
365
  "epoch": 0.39254170755642787,
366
  "eval_logits/chosen": -1.15625,
367
  "eval_logits/rejected": -1.203125,
368
- "eval_logps/bottom_tokens": -0.00091552734375,
369
  "eval_logps/chosen": -422.0,
 
 
370
  "eval_logps/rejected": -380.0,
 
 
371
  "eval_loss": 0.6488671898841858,
372
  "eval_rewards/accuracies": 0.6447761058807373,
373
  "eval_rewards/chosen": -0.287109375,
374
  "eval_rewards/margins": 0.13671875,
375
  "eval_rewards/rejected": -0.423828125,
376
- "eval_runtime": 94.2281,
377
- "eval_samples_per_second": 21.225,
378
- "eval_steps_per_second": 0.711,
379
  "step": 200
380
  },
381
  {
@@ -384,9 +453,12 @@
384
  "learning_rate": 3.654794035589483e-07,
385
  "logits/chosen": -1.1328125,
386
  "logits/rejected": -1.1640625,
387
- "logps/bottom_tokens": -0.00092315673828125,
388
  "logps/chosen": -362.0,
 
 
389
  "logps/rejected": -344.0,
 
 
390
  "loss": 0.6512,
391
  "rewards/accuracies": 0.6149999499320984,
392
  "rewards/chosen": -0.298828125,
@@ -400,9 +472,12 @@
400
  "learning_rate": 3.5001042761570826e-07,
401
  "logits/chosen": -1.171875,
402
  "logits/rejected": -1.2109375,
403
- "logps/bottom_tokens": -0.000751495361328125,
404
  "logps/chosen": -414.0,
 
 
405
  "logps/rejected": -398.0,
 
 
406
  "loss": 0.6507,
407
  "rewards/accuracies": 0.5800000429153442,
408
  "rewards/chosen": -0.333984375,
@@ -416,9 +491,12 @@
416
  "learning_rate": 3.34071077157304e-07,
417
  "logits/chosen": -1.171875,
418
  "logits/rejected": -1.2265625,
419
- "logps/bottom_tokens": -0.000789642333984375,
420
  "logps/chosen": -388.0,
 
 
421
  "logps/rejected": -354.0,
 
 
422
  "loss": 0.6464,
423
  "rewards/accuracies": 0.6299999952316284,
424
  "rewards/chosen": -0.353515625,
@@ -432,9 +510,12 @@
432
  "learning_rate": 3.1773631900892204e-07,
433
  "logits/chosen": -1.1484375,
434
  "logits/rejected": -1.1875,
435
- "logps/bottom_tokens": -0.00077056884765625,
436
  "logps/chosen": -416.0,
 
 
437
  "logps/rejected": -396.0,
 
 
438
  "loss": 0.6442,
439
  "rewards/accuracies": 0.6200000047683716,
440
  "rewards/chosen": -0.38671875,
@@ -448,9 +529,12 @@
448
  "learning_rate": 3.0108297969883103e-07,
449
  "logits/chosen": -1.1640625,
450
  "logits/rejected": -1.1953125,
451
- "logps/bottom_tokens": -0.000827789306640625,
452
  "logps/chosen": -426.0,
 
 
453
  "logps/rejected": -398.0,
 
 
454
  "loss": 0.6347,
455
  "rewards/accuracies": 0.64000004529953,
456
  "rewards/chosen": -0.39453125,
@@ -464,9 +548,12 @@
464
  "learning_rate": 2.8418938412365013e-07,
465
  "logits/chosen": -1.1640625,
466
  "logits/rejected": -1.203125,
467
- "logps/bottom_tokens": -0.000873565673828125,
468
  "logps/chosen": -396.0,
 
 
469
  "logps/rejected": -372.0,
 
 
470
  "loss": 0.6381,
471
  "rewards/accuracies": 0.6350000500679016,
472
  "rewards/chosen": -0.373046875,
@@ -480,9 +567,12 @@
480
  "learning_rate": 2.671349871664101e-07,
481
  "logits/chosen": -1.1640625,
482
  "logits/rejected": -1.171875,
483
- "logps/bottom_tokens": -0.0007781982421875,
484
  "logps/chosen": -398.0,
 
 
485
  "logps/rejected": -386.0,
 
 
486
  "loss": 0.6315,
487
  "rewards/accuracies": 0.64000004529953,
488
  "rewards/chosen": -0.40625,
@@ -496,9 +586,12 @@
496
  "learning_rate": 2.5e-07,
497
  "logits/chosen": -1.171875,
498
  "logits/rejected": -1.1953125,
499
- "logps/bottom_tokens": -0.000904083251953125,
500
  "logps/chosen": -438.0,
 
 
501
  "logps/rejected": -402.0,
 
 
502
  "loss": 0.6384,
503
  "rewards/accuracies": 0.6600000262260437,
504
  "rewards/chosen": -0.42578125,
@@ -512,9 +605,12 @@
512
  "learning_rate": 2.3286501283358982e-07,
513
  "logits/chosen": -1.1796875,
514
  "logits/rejected": -1.2421875,
515
- "logps/bottom_tokens": -0.00086212158203125,
516
  "logps/chosen": -412.0,
 
 
517
  "logps/rejected": -376.0,
 
 
518
  "loss": 0.632,
519
  "rewards/accuracies": 0.5750000476837158,
520
  "rewards/chosen": -0.4609375,
@@ -528,9 +624,12 @@
528
  "learning_rate": 2.1581061587634987e-07,
529
  "logits/chosen": -1.203125,
530
  "logits/rejected": -1.2421875,
531
- "logps/bottom_tokens": -0.000774383544921875,
532
  "logps/chosen": -428.0,
 
 
533
  "logps/rejected": -388.0,
 
 
534
  "loss": 0.6396,
535
  "rewards/accuracies": 0.5999999642372131,
536
  "rewards/chosen": -0.482421875,
@@ -542,17 +641,20 @@
542
  "epoch": 0.5888125613346418,
543
  "eval_logits/chosen": -1.1875,
544
  "eval_logits/rejected": -1.234375,
545
- "eval_logps/bottom_tokens": -0.000751495361328125,
546
  "eval_logps/chosen": -438.0,
 
 
547
  "eval_logps/rejected": -406.0,
 
 
548
  "eval_loss": 0.6303857564926147,
549
  "eval_rewards/accuracies": 0.6626865863800049,
550
  "eval_rewards/chosen": -0.451171875,
551
  "eval_rewards/margins": 0.2275390625,
552
  "eval_rewards/rejected": -0.6796875,
553
- "eval_runtime": 94.2601,
554
- "eval_samples_per_second": 21.218,
555
- "eval_steps_per_second": 0.711,
556
  "step": 300
557
  },
558
  {
@@ -561,9 +663,12 @@
561
  "learning_rate": 1.9891702030116897e-07,
562
  "logits/chosen": -1.140625,
563
  "logits/rejected": -1.2421875,
564
- "logps/bottom_tokens": -0.000728607177734375,
565
  "logps/chosen": -446.0,
 
 
566
  "logps/rejected": -358.0,
 
 
567
  "loss": 0.6234,
568
  "rewards/accuracies": 0.6949999928474426,
569
  "rewards/chosen": -0.408203125,
@@ -577,9 +682,12 @@
577
  "learning_rate": 1.8226368099107792e-07,
578
  "logits/chosen": -1.1640625,
579
  "logits/rejected": -1.2109375,
580
- "logps/bottom_tokens": -0.000823974609375,
581
  "logps/chosen": -424.0,
 
 
582
  "logps/rejected": -364.0,
 
 
583
  "loss": 0.6241,
584
  "rewards/accuracies": 0.675000011920929,
585
  "rewards/chosen": -0.42578125,
@@ -593,9 +701,12 @@
593
  "learning_rate": 1.6592892284269594e-07,
594
  "logits/chosen": -1.1796875,
595
  "logits/rejected": -1.2109375,
596
- "logps/bottom_tokens": -0.00072479248046875,
597
  "logps/chosen": -408.0,
 
 
598
  "logps/rejected": -386.0,
 
 
599
  "loss": 0.6224,
600
  "rewards/accuracies": 0.6799999475479126,
601
  "rewards/chosen": -0.431640625,
@@ -609,9 +720,12 @@
609
  "learning_rate": 1.4998957238429172e-07,
610
  "logits/chosen": -1.21875,
611
  "logits/rejected": -1.2421875,
612
- "logps/bottom_tokens": -0.000789642333984375,
613
  "logps/chosen": -408.0,
 
 
614
  "logps/rejected": -380.0,
 
 
615
  "loss": 0.6204,
616
  "rewards/accuracies": 0.6300000548362732,
617
  "rewards/chosen": -0.5078125,
@@ -625,9 +739,12 @@
625
  "learning_rate": 1.345205964410517e-07,
626
  "logits/chosen": -1.1953125,
627
  "logits/rejected": -1.21875,
628
- "logps/bottom_tokens": -0.00087738037109375,
629
  "logps/chosen": -392.0,
 
 
630
  "logps/rejected": -372.0,
 
 
631
  "loss": 0.627,
632
  "rewards/accuracies": 0.5850000381469727,
633
  "rewards/chosen": -0.49609375,
@@ -641,9 +758,12 @@
641
  "learning_rate": 1.1959474954700665e-07,
642
  "logits/chosen": -1.1796875,
643
  "logits/rejected": -1.21875,
644
- "logps/bottom_tokens": -0.00067138671875,
645
  "logps/chosen": -424.0,
 
 
646
  "logps/rejected": -416.0,
 
 
647
  "loss": 0.613,
648
  "rewards/accuracies": 0.7049999833106995,
649
  "rewards/chosen": -0.44140625,
@@ -657,9 +777,12 @@
657
  "learning_rate": 1.0528223176192615e-07,
658
  "logits/chosen": -1.1953125,
659
  "logits/rejected": -1.234375,
660
- "logps/bottom_tokens": -0.0006866455078125,
661
  "logps/chosen": -442.0,
 
 
662
  "logps/rejected": -398.0,
 
 
663
  "loss": 0.6218,
664
  "rewards/accuracies": 0.6250000596046448,
665
  "rewards/chosen": -0.5234375,
@@ -673,9 +796,12 @@
673
  "learning_rate": 9.16503585025567e-08,
674
  "logits/chosen": -1.1953125,
675
  "logits/rejected": -1.21875,
676
- "logps/bottom_tokens": -0.0009765625,
677
  "logps/chosen": -420.0,
 
 
678
  "logps/rejected": -412.0,
 
 
679
  "loss": 0.6279,
680
  "rewards/accuracies": 0.6499999761581421,
681
  "rewards/chosen": -0.5,
@@ -689,9 +815,12 @@
689
  "learning_rate": 7.876324394107017e-08,
690
  "logits/chosen": -1.15625,
691
  "logits/rejected": -1.203125,
692
- "logps/bottom_tokens": -0.0006866455078125,
693
  "logps/chosen": -442.0,
 
 
694
  "logps/rejected": -418.0,
 
 
695
  "loss": 0.6289,
696
  "rewards/accuracies": 0.6350000500679016,
697
  "rewards/chosen": -0.50390625,
@@ -705,9 +834,12 @@
705
  "learning_rate": 6.668149945978201e-08,
706
  "logits/chosen": -1.1953125,
707
  "logits/rejected": -1.2265625,
708
- "logps/bottom_tokens": -0.000720977783203125,
709
  "logps/chosen": -440.0,
 
 
710
  "logps/rejected": -420.0,
 
 
711
  "loss": 0.6102,
712
  "rewards/accuracies": 0.6700000166893005,
713
  "rewards/chosen": -0.482421875,
@@ -719,17 +851,20 @@
719
  "epoch": 0.7850834151128557,
720
  "eval_logits/chosen": -1.1875,
721
  "eval_logits/rejected": -1.234375,
722
- "eval_logps/bottom_tokens": -0.000667572021484375,
723
  "eval_logps/chosen": -444.0,
 
 
724
  "eval_logps/rejected": -414.0,
 
 
725
  "eval_loss": 0.6267920136451721,
726
  "eval_rewards/accuracies": 0.6567164063453674,
727
  "eval_rewards/chosen": -0.50390625,
728
  "eval_rewards/margins": 0.2578125,
729
  "eval_rewards/rejected": -0.76171875,
730
- "eval_runtime": 94.2884,
731
- "eval_samples_per_second": 21.212,
732
- "eval_steps_per_second": 0.711,
733
  "step": 400
734
  },
735
  {
@@ -738,9 +873,12 @@
738
  "learning_rate": 5.546194858038072e-08,
739
  "logits/chosen": -1.171875,
740
  "logits/rejected": -1.21875,
741
- "logps/bottom_tokens": -0.000797271728515625,
742
  "logps/chosen": -416.0,
 
 
743
  "logps/rejected": -374.0,
 
 
744
  "loss": 0.6227,
745
  "rewards/accuracies": 0.6699999570846558,
746
  "rewards/chosen": -0.51171875,
@@ -754,9 +892,12 @@
754
  "learning_rate": 4.5157359708432626e-08,
755
  "logits/chosen": -1.1953125,
756
  "logits/rejected": -1.2265625,
757
- "logps/bottom_tokens": -0.000751495361328125,
758
  "logps/chosen": -394.0,
 
 
759
  "logps/rejected": -412.0,
 
 
760
  "loss": 0.6205,
761
  "rewards/accuracies": 0.625,
762
  "rewards/chosen": -0.474609375,
@@ -770,9 +911,12 @@
770
  "learning_rate": 3.581619795012874e-08,
771
  "logits/chosen": -1.1796875,
772
  "logits/rejected": -1.1875,
773
- "logps/bottom_tokens": -0.000762939453125,
774
  "logps/chosen": -400.0,
 
 
775
  "logps/rejected": -404.0,
 
 
776
  "loss": 0.6208,
777
  "rewards/accuracies": 0.675000011920929,
778
  "rewards/chosen": -0.474609375,
@@ -786,9 +930,12 @@
786
  "learning_rate": 2.748239716854589e-08,
787
  "logits/chosen": -1.2109375,
788
  "logits/rejected": -1.1953125,
789
- "logps/bottom_tokens": -0.00074005126953125,
790
  "logps/chosen": -424.0,
 
 
791
  "logps/rejected": -420.0,
 
 
792
  "loss": 0.6398,
793
  "rewards/accuracies": 0.5849999785423279,
794
  "rewards/chosen": -0.51171875,
@@ -802,9 +949,12 @@
802
  "learning_rate": 2.0195153351498323e-08,
803
  "logits/chosen": -1.1796875,
804
  "logits/rejected": -1.2109375,
805
- "logps/bottom_tokens": -0.0007171630859375,
806
  "logps/chosen": -432.0,
 
 
807
  "logps/rejected": -420.0,
 
 
808
  "loss": 0.611,
809
  "rewards/accuracies": 0.6149999499320984,
810
  "rewards/chosen": -0.53125,
@@ -818,9 +968,12 @@
818
  "learning_rate": 1.3988740262822846e-08,
819
  "logits/chosen": -1.1953125,
820
  "logits/rejected": -1.203125,
821
- "logps/bottom_tokens": -0.00067138671875,
822
  "logps/chosen": -428.0,
 
 
823
  "logps/rejected": -410.0,
 
 
824
  "loss": 0.6138,
825
  "rewards/accuracies": 0.6349999904632568,
826
  "rewards/chosen": -0.490234375,
@@ -834,9 +987,12 @@
834
  "learning_rate": 8.892348244137788e-09,
835
  "logits/chosen": -1.1875,
836
  "logits/rejected": -1.2421875,
837
- "logps/bottom_tokens": -0.00066375732421875,
838
  "logps/chosen": -474.0,
 
 
839
  "logps/rejected": -444.0,
 
 
840
  "loss": 0.6106,
841
  "rewards/accuracies": 0.6299999952316284,
842
  "rewards/chosen": -0.494140625,
@@ -850,9 +1006,12 @@
850
  "learning_rate": 4.929946925231076e-09,
851
  "logits/chosen": -1.1328125,
852
  "logits/rejected": -1.171875,
853
- "logps/bottom_tokens": -0.000850677490234375,
854
  "logps/chosen": -410.0,
 
 
855
  "logps/rejected": -412.0,
 
 
856
  "loss": 0.6203,
857
  "rewards/accuracies": 0.6049999594688416,
858
  "rewards/chosen": -0.48046875,
@@ -866,9 +1025,12 @@
866
  "learning_rate": 2.1201724887858484e-09,
867
  "logits/chosen": -1.1640625,
868
  "logits/rejected": -1.171875,
869
- "logps/bottom_tokens": -0.000732421875,
870
  "logps/chosen": -422.0,
 
 
871
  "logps/rejected": -412.0,
 
 
872
  "loss": 0.6235,
873
  "rewards/accuracies": 0.5949999690055847,
874
  "rewards/chosen": -0.5390625,
@@ -882,9 +1044,12 @@
882
  "learning_rate": 4.762400196664518e-10,
883
  "logits/chosen": -1.1484375,
884
  "logits/rejected": -1.1953125,
885
- "logps/bottom_tokens": -0.0006256103515625,
886
  "logps/chosen": -428.0,
 
 
887
  "logps/rejected": -388.0,
 
 
888
  "loss": 0.6084,
889
  "rewards/accuracies": 0.625,
890
  "rewards/chosen": -0.578125,
@@ -896,27 +1061,30 @@
896
  "epoch": 0.9813542688910697,
897
  "eval_logits/chosen": -1.1953125,
898
  "eval_logits/rejected": -1.2421875,
899
- "eval_logps/bottom_tokens": -0.000743865966796875,
900
  "eval_logps/chosen": -446.0,
 
 
901
  "eval_logps/rejected": -416.0,
 
 
902
  "eval_loss": 0.6259472370147705,
903
  "eval_rewards/accuracies": 0.6567164659500122,
904
  "eval_rewards/chosen": -0.5234375,
905
  "eval_rewards/margins": 0.26171875,
906
  "eval_rewards/rejected": -0.78515625,
907
- "eval_runtime": 94.3436,
908
- "eval_samples_per_second": 21.199,
909
- "eval_steps_per_second": 0.71,
910
  "step": 500
911
  },
912
  {
913
  "epoch": 0.9990186457311089,
914
  "step": 509,
915
  "total_flos": 0.0,
916
- "train_loss": 0.011100004604616897,
917
- "train_runtime": 142.2419,
918
- "train_samples_per_second": 429.789,
919
- "train_steps_per_second": 3.578
920
  }
921
  ],
922
  "logging_steps": 10,
 
14
  "learning_rate": 9.803921568627451e-09,
15
  "logits/chosen": -1.125,
16
  "logits/rejected": -1.1875,
 
17
  "logps/chosen": -500.0,
18
+ "logps/chosen_bottom_tokens": -14.5,
19
+ "logps/chosen_top_tokens": -0.0005645751953125,
20
  "logps/rejected": -520.0,
21
+ "logps/rejected_bottom_tokens": -13.9375,
22
+ "logps/rejected_top_tokens": -0.00054168701171875,
23
  "loss": 0.6914,
24
  "rewards/accuracies": 0.0,
25
  "rewards/chosen": 0.0,
 
33
  "learning_rate": 9.80392156862745e-08,
34
  "logits/chosen": -1.1640625,
35
  "logits/rejected": -1.203125,
 
36
  "logps/chosen": -380.0,
37
+ "logps/chosen_bottom_tokens": -14.125,
38
+ "logps/chosen_top_tokens": -0.000804901123046875,
39
  "logps/rejected": -316.0,
40
+ "logps/rejected_bottom_tokens": -14.125,
41
+ "logps/rejected_top_tokens": -0.000827789306640625,
42
  "loss": 0.6922,
43
  "rewards/accuracies": 0.41111111640930176,
44
  "rewards/chosen": -0.000202178955078125,
 
52
  "learning_rate": 1.96078431372549e-07,
53
  "logits/chosen": -1.0859375,
54
  "logits/rejected": -1.1484375,
 
55
  "logps/chosen": -374.0,
56
+ "logps/chosen_bottom_tokens": -14.125,
57
+ "logps/chosen_top_tokens": -0.000835418701171875,
58
  "logps/rejected": -324.0,
59
+ "logps/rejected_bottom_tokens": -14.0625,
60
+ "logps/rejected_top_tokens": -0.00084686279296875,
61
  "loss": 0.6913,
62
  "rewards/accuracies": 0.4399999976158142,
63
  "rewards/chosen": 0.003662109375,
 
71
  "learning_rate": 2.941176470588235e-07,
72
  "logits/chosen": -1.125,
73
  "logits/rejected": -1.1796875,
 
74
  "logps/chosen": -364.0,
75
+ "logps/chosen_bottom_tokens": -14.25,
76
+ "logps/chosen_top_tokens": -0.000762939453125,
77
  "logps/rejected": -324.0,
78
+ "logps/rejected_bottom_tokens": -14.125,
79
+ "logps/rejected_top_tokens": -0.00078582763671875,
80
  "loss": 0.6938,
81
  "rewards/accuracies": 0.3850000202655792,
82
  "rewards/chosen": -0.0030670166015625,
 
90
  "learning_rate": 3.92156862745098e-07,
91
  "logits/chosen": -1.125,
92
  "logits/rejected": -1.15625,
 
93
  "logps/chosen": -378.0,
94
+ "logps/chosen_bottom_tokens": -14.1875,
95
+ "logps/chosen_top_tokens": -0.000759124755859375,
96
  "logps/rejected": -338.0,
97
+ "logps/rejected_bottom_tokens": -14.125,
98
+ "logps/rejected_top_tokens": -0.000804901123046875,
99
  "loss": 0.6929,
100
  "rewards/accuracies": 0.445000022649765,
101
  "rewards/chosen": 0.000881195068359375,
 
109
  "learning_rate": 4.901960784313725e-07,
110
  "logits/chosen": -1.1171875,
111
  "logits/rejected": -1.1484375,
 
112
  "logps/chosen": -406.0,
113
+ "logps/chosen_bottom_tokens": -14.0625,
114
+ "logps/chosen_top_tokens": -0.000774383544921875,
115
  "logps/rejected": -352.0,
116
+ "logps/rejected_bottom_tokens": -14.0,
117
+ "logps/rejected_top_tokens": -0.00080108642578125,
118
  "loss": 0.6915,
119
  "rewards/accuracies": 0.5400000214576721,
120
  "rewards/chosen": 0.0026397705078125,
 
128
  "learning_rate": 4.995237599803335e-07,
129
  "logits/chosen": -1.140625,
130
  "logits/rejected": -1.203125,
 
131
  "logps/chosen": -406.0,
132
+ "logps/chosen_bottom_tokens": -14.1875,
133
+ "logps/chosen_top_tokens": -0.000782012939453125,
134
  "logps/rejected": -322.0,
135
+ "logps/rejected_bottom_tokens": -14.0625,
136
+ "logps/rejected_top_tokens": -0.0008087158203125,
137
  "loss": 0.6913,
138
  "rewards/accuracies": 0.42500001192092896,
139
  "rewards/chosen": 0.00244140625,
 
147
  "learning_rate": 4.978798275112142e-07,
148
  "logits/chosen": -1.09375,
149
  "logits/rejected": -1.1328125,
 
150
  "logps/chosen": -372.0,
151
+ "logps/chosen_bottom_tokens": -14.0625,
152
+ "logps/chosen_top_tokens": -0.00078582763671875,
153
  "logps/rejected": -330.0,
154
+ "logps/rejected_bottom_tokens": -14.0625,
155
+ "logps/rejected_top_tokens": -0.000789642333984375,
156
  "loss": 0.688,
157
  "rewards/accuracies": 0.5049999952316284,
158
  "rewards/chosen": 0.00897216796875,
 
166
  "learning_rate": 4.950700530747689e-07,
167
  "logits/chosen": -1.078125,
168
  "logits/rejected": -1.1484375,
 
169
  "logps/chosen": -378.0,
170
+ "logps/chosen_bottom_tokens": -14.0,
171
+ "logps/chosen_top_tokens": -0.000934600830078125,
172
  "logps/rejected": -308.0,
173
+ "logps/rejected_bottom_tokens": -14.0,
174
+ "logps/rejected_top_tokens": -0.00087738037109375,
175
  "loss": 0.685,
176
  "rewards/accuracies": 0.5450000166893005,
177
  "rewards/chosen": -0.00121307373046875,
 
185
  "learning_rate": 4.911076517558622e-07,
186
  "logits/chosen": -1.125,
187
  "logits/rejected": -1.15625,
 
188
  "logps/chosen": -382.0,
189
+ "logps/chosen_bottom_tokens": -14.125,
190
+ "logps/chosen_top_tokens": -0.000823974609375,
191
  "logps/rejected": -346.0,
192
+ "logps/rejected_bottom_tokens": -14.0625,
193
+ "logps/rejected_top_tokens": -0.00084686279296875,
194
  "loss": 0.6832,
195
  "rewards/accuracies": 0.5600000023841858,
196
  "rewards/chosen": -0.0106201171875,
 
204
  "learning_rate": 4.860112597371772e-07,
205
  "logits/chosen": -1.125,
206
  "logits/rejected": -1.171875,
 
207
  "logps/chosen": -372.0,
208
+ "logps/chosen_bottom_tokens": -14.125,
209
+ "logps/chosen_top_tokens": -0.000904083251953125,
210
  "logps/rejected": -328.0,
211
+ "logps/rejected_bottom_tokens": -14.0,
212
+ "logps/rejected_top_tokens": -0.0009002685546875,
213
  "loss": 0.678,
214
  "rewards/accuracies": 0.699999988079071,
215
  "rewards/chosen": -0.017333984375,
 
221
  "epoch": 0.19627085377821393,
222
  "eval_logits/chosen": -1.09375,
223
  "eval_logits/rejected": -1.15625,
 
224
  "eval_logps/chosen": -396.0,
225
+ "eval_logps/chosen_bottom_tokens": -14.0625,
226
+ "eval_logps/chosen_top_tokens": -0.0008697509765625,
227
  "eval_logps/rejected": -344.0,
228
+ "eval_logps/rejected_bottom_tokens": -14.0,
229
+ "eval_logps/rejected_top_tokens": -0.0008697509765625,
230
  "eval_loss": 0.6789160370826721,
231
  "eval_rewards/accuracies": 0.5880597233772278,
232
  "eval_rewards/chosen": -0.0274658203125,
233
  "eval_rewards/margins": 0.033203125,
234
  "eval_rewards/rejected": -0.060791015625,
235
+ "eval_runtime": 111.5869,
236
+ "eval_samples_per_second": 17.923,
237
+ "eval_steps_per_second": 0.6,
238
  "step": 100
239
  },
240
  {
 
243
  "learning_rate": 4.798048466485017e-07,
244
  "logits/chosen": -1.1015625,
245
  "logits/rejected": -1.109375,
 
246
  "logps/chosen": -344.0,
247
+ "logps/chosen_bottom_tokens": -14.0,
248
+ "logps/chosen_top_tokens": -0.000835418701171875,
249
  "logps/rejected": -332.0,
250
+ "logps/rejected_bottom_tokens": -14.0,
251
+ "logps/rejected_top_tokens": -0.000873565673828125,
252
  "loss": 0.6804,
253
  "rewards/accuracies": 0.5899999737739563,
254
  "rewards/chosen": -0.037109375,
 
262
  "learning_rate": 4.725176028314541e-07,
263
  "logits/chosen": -1.109375,
264
  "logits/rejected": -1.1171875,
 
265
  "logps/chosen": -372.0,
266
+ "logps/chosen_bottom_tokens": -14.0,
267
+ "logps/chosen_top_tokens": -0.0008544921875,
268
  "logps/rejected": -354.0,
269
+ "logps/rejected_bottom_tokens": -14.0,
270
+ "logps/rejected_top_tokens": -0.0008544921875,
271
  "loss": 0.6745,
272
  "rewards/accuracies": 0.6399999856948853,
273
  "rewards/chosen": -0.03564453125,
 
281
  "learning_rate": 4.641838020498713e-07,
282
  "logits/chosen": -1.09375,
283
  "logits/rejected": -1.1640625,
 
284
  "logps/chosen": -408.0,
285
+ "logps/chosen_bottom_tokens": -14.1875,
286
+ "logps/chosen_top_tokens": -0.000934600830078125,
287
  "logps/rejected": -338.0,
288
+ "logps/rejected_bottom_tokens": -14.1875,
289
+ "logps/rejected_top_tokens": -0.00092315673828125,
290
  "loss": 0.6674,
291
  "rewards/accuracies": 0.5849999785423279,
292
  "rewards/chosen": -0.0703125,
 
300
  "learning_rate": 4.5484264029156733e-07,
301
  "logits/chosen": -1.1015625,
302
  "logits/rejected": -1.1484375,
 
303
  "logps/chosen": -386.0,
304
+ "logps/chosen_bottom_tokens": -14.125,
305
+ "logps/chosen_top_tokens": -0.000812530517578125,
306
  "logps/rejected": -336.0,
307
+ "logps/rejected_bottom_tokens": -14.125,
308
+ "logps/rejected_top_tokens": -0.00083160400390625,
309
  "loss": 0.6635,
310
  "rewards/accuracies": 0.5900000333786011,
311
  "rewards/chosen": -0.1015625,
 
319
  "learning_rate": 4.445380514196192e-07,
320
  "logits/chosen": -1.09375,
321
  "logits/rejected": -1.171875,
 
322
  "logps/chosen": -428.0,
323
+ "logps/chosen_bottom_tokens": -14.1875,
324
+ "logps/chosen_top_tokens": -0.00087738037109375,
325
  "logps/rejected": -356.0,
326
+ "logps/rejected_bottom_tokens": -14.1875,
327
+ "logps/rejected_top_tokens": -0.000858306884765625,
328
  "loss": 0.668,
329
  "rewards/accuracies": 0.6350000500679016,
330
  "rewards/chosen": -0.12890625,
 
338
  "learning_rate": 4.33318500540218e-07,
339
  "logits/chosen": -1.0859375,
340
  "logits/rejected": -1.1328125,
 
341
  "logps/chosen": -408.0,
342
+ "logps/chosen_bottom_tokens": -14.125,
343
+ "logps/chosen_top_tokens": -0.00089263916015625,
344
  "logps/rejected": -368.0,
345
+ "logps/rejected_bottom_tokens": -14.0625,
346
+ "logps/rejected_top_tokens": -0.000904083251953125,
347
  "loss": 0.6655,
348
  "rewards/accuracies": 0.6050000190734863,
349
  "rewards/chosen": -0.16796875,
 
357
  "learning_rate": 4.2123675605892985e-07,
358
  "logits/chosen": -1.078125,
359
  "logits/rejected": -1.1484375,
 
360
  "logps/chosen": -422.0,
361
+ "logps/chosen_bottom_tokens": -14.3125,
362
+ "logps/chosen_top_tokens": -0.00101470947265625,
363
  "logps/rejected": -364.0,
364
+ "logps/rejected_bottom_tokens": -14.25,
365
+ "logps/rejected_top_tokens": -0.00098419189453125,
366
  "loss": 0.6585,
367
  "rewards/accuracies": 0.6450000405311584,
368
  "rewards/chosen": -0.1826171875,
 
376
  "learning_rate": 4.0834964149744333e-07,
377
  "logits/chosen": -1.1015625,
378
  "logits/rejected": -1.15625,
 
379
  "logps/chosen": -416.0,
380
+ "logps/chosen_bottom_tokens": -14.25,
381
+ "logps/chosen_top_tokens": -0.00093841552734375,
382
  "logps/rejected": -380.0,
383
+ "logps/rejected_bottom_tokens": -14.1875,
384
+ "logps/rejected_top_tokens": -0.00099945068359375,
385
  "loss": 0.6643,
386
  "rewards/accuracies": 0.6100000143051147,
387
  "rewards/chosen": -0.232421875,
 
395
  "learning_rate": 3.947177682380738e-07,
396
  "logits/chosen": -1.1796875,
397
  "logits/rejected": -1.203125,
 
398
  "logps/chosen": -378.0,
399
+ "logps/chosen_bottom_tokens": -14.25,
400
+ "logps/chosen_top_tokens": -0.000789642333984375,
401
  "logps/rejected": -356.0,
402
+ "logps/rejected_bottom_tokens": -14.1875,
403
+ "logps/rejected_top_tokens": -0.000823974609375,
404
  "loss": 0.6499,
405
  "rewards/accuracies": 0.6450001001358032,
406
  "rewards/chosen": -0.2412109375,
 
414
  "learning_rate": 3.804052504529933e-07,
415
  "logits/chosen": -1.1796875,
416
  "logits/rejected": -1.2265625,
 
417
  "logps/chosen": -392.0,
418
+ "logps/chosen_bottom_tokens": -14.375,
419
+ "logps/chosen_top_tokens": -0.0008392333984375,
420
  "logps/rejected": -370.0,
421
+ "logps/rejected_bottom_tokens": -14.375,
422
+ "logps/rejected_top_tokens": -0.00087738037109375,
423
  "loss": 0.645,
424
  "rewards/accuracies": 0.6350000500679016,
425
  "rewards/chosen": -0.271484375,
 
431
  "epoch": 0.39254170755642787,
432
  "eval_logits/chosen": -1.15625,
433
  "eval_logits/rejected": -1.203125,
 
434
  "eval_logps/chosen": -422.0,
435
+ "eval_logps/chosen_bottom_tokens": -14.375,
436
+ "eval_logps/chosen_top_tokens": -0.000911712646484375,
437
  "eval_logps/rejected": -380.0,
438
+ "eval_logps/rejected_bottom_tokens": -14.3125,
439
+ "eval_logps/rejected_top_tokens": -0.000919342041015625,
440
  "eval_loss": 0.6488671898841858,
441
  "eval_rewards/accuracies": 0.6447761058807373,
442
  "eval_rewards/chosen": -0.287109375,
443
  "eval_rewards/margins": 0.13671875,
444
  "eval_rewards/rejected": -0.423828125,
445
+ "eval_runtime": 111.5112,
446
+ "eval_samples_per_second": 17.935,
447
+ "eval_steps_per_second": 0.601,
448
  "step": 200
449
  },
450
  {
 
453
  "learning_rate": 3.654794035589483e-07,
454
  "logits/chosen": -1.1328125,
455
  "logits/rejected": -1.1640625,
 
456
  "logps/chosen": -362.0,
457
+ "logps/chosen_bottom_tokens": -14.25,
458
+ "logps/chosen_top_tokens": -0.00091552734375,
459
  "logps/rejected": -344.0,
460
+ "logps/rejected_bottom_tokens": -14.25,
461
+ "logps/rejected_top_tokens": -0.000926971435546875,
462
  "loss": 0.6512,
463
  "rewards/accuracies": 0.6149999499320984,
464
  "rewards/chosen": -0.298828125,
 
472
  "learning_rate": 3.5001042761570826e-07,
473
  "logits/chosen": -1.171875,
474
  "logits/rejected": -1.2109375,
 
475
  "logps/chosen": -414.0,
476
+ "logps/chosen_bottom_tokens": -14.5,
477
+ "logps/chosen_top_tokens": -0.000762939453125,
478
  "logps/rejected": -398.0,
479
+ "logps/rejected_bottom_tokens": -14.375,
480
+ "logps/rejected_top_tokens": -0.000743865966796875,
481
  "loss": 0.6507,
482
  "rewards/accuracies": 0.5800000429153442,
483
  "rewards/chosen": -0.333984375,
 
491
  "learning_rate": 3.34071077157304e-07,
492
  "logits/chosen": -1.171875,
493
  "logits/rejected": -1.2265625,
 
494
  "logps/chosen": -388.0,
495
+ "logps/chosen_bottom_tokens": -14.375,
496
+ "logps/chosen_top_tokens": -0.00075531005859375,
497
  "logps/rejected": -354.0,
498
+ "logps/rejected_bottom_tokens": -14.3125,
499
+ "logps/rejected_top_tokens": -0.000827789306640625,
500
  "loss": 0.6464,
501
  "rewards/accuracies": 0.6299999952316284,
502
  "rewards/chosen": -0.353515625,
 
510
  "learning_rate": 3.1773631900892204e-07,
511
  "logits/chosen": -1.1484375,
512
  "logits/rejected": -1.1875,
 
513
  "logps/chosen": -416.0,
514
+ "logps/chosen_bottom_tokens": -14.375,
515
+ "logps/chosen_top_tokens": -0.000759124755859375,
516
  "logps/rejected": -396.0,
517
+ "logps/rejected_bottom_tokens": -14.375,
518
+ "logps/rejected_top_tokens": -0.0007781982421875,
519
  "loss": 0.6442,
520
  "rewards/accuracies": 0.6200000047683716,
521
  "rewards/chosen": -0.38671875,
 
529
  "learning_rate": 3.0108297969883103e-07,
530
  "logits/chosen": -1.1640625,
531
  "logits/rejected": -1.1953125,
 
532
  "logps/chosen": -426.0,
533
+ "logps/chosen_bottom_tokens": -14.4375,
534
+ "logps/chosen_top_tokens": -0.0008392333984375,
535
  "logps/rejected": -398.0,
536
+ "logps/rejected_bottom_tokens": -14.375,
537
+ "logps/rejected_top_tokens": -0.00081634521484375,
538
  "loss": 0.6347,
539
  "rewards/accuracies": 0.64000004529953,
540
  "rewards/chosen": -0.39453125,
 
548
  "learning_rate": 2.8418938412365013e-07,
549
  "logits/chosen": -1.1640625,
550
  "logits/rejected": -1.203125,
 
551
  "logps/chosen": -396.0,
552
+ "logps/chosen_bottom_tokens": -14.3125,
553
+ "logps/chosen_top_tokens": -0.000865936279296875,
554
  "logps/rejected": -372.0,
555
+ "logps/rejected_bottom_tokens": -14.1875,
556
+ "logps/rejected_top_tokens": -0.000885009765625,
557
  "loss": 0.6381,
558
  "rewards/accuracies": 0.6350000500679016,
559
  "rewards/chosen": -0.373046875,
 
567
  "learning_rate": 2.671349871664101e-07,
568
  "logits/chosen": -1.1640625,
569
  "logits/rejected": -1.171875,
 
570
  "logps/chosen": -398.0,
571
+ "logps/chosen_bottom_tokens": -14.3125,
572
+ "logps/chosen_top_tokens": -0.000782012939453125,
573
  "logps/rejected": -386.0,
574
+ "logps/rejected_bottom_tokens": -14.25,
575
+ "logps/rejected_top_tokens": -0.00077056884765625,
576
  "loss": 0.6315,
577
  "rewards/accuracies": 0.64000004529953,
578
  "rewards/chosen": -0.40625,
 
586
  "learning_rate": 2.5e-07,
587
  "logits/chosen": -1.171875,
588
  "logits/rejected": -1.1953125,
 
589
  "logps/chosen": -438.0,
590
+ "logps/chosen_bottom_tokens": -14.3125,
591
+ "logps/chosen_top_tokens": -0.000911712646484375,
592
  "logps/rejected": -402.0,
593
+ "logps/rejected_bottom_tokens": -14.25,
594
+ "logps/rejected_top_tokens": -0.0009002685546875,
595
  "loss": 0.6384,
596
  "rewards/accuracies": 0.6600000262260437,
597
  "rewards/chosen": -0.42578125,
 
605
  "learning_rate": 2.3286501283358982e-07,
606
  "logits/chosen": -1.1796875,
607
  "logits/rejected": -1.2421875,
 
608
  "logps/chosen": -412.0,
609
+ "logps/chosen_bottom_tokens": -14.3125,
610
+ "logps/chosen_top_tokens": -0.0008392333984375,
611
  "logps/rejected": -376.0,
612
+ "logps/rejected_bottom_tokens": -14.25,
613
+ "logps/rejected_top_tokens": -0.000885009765625,
614
  "loss": 0.632,
615
  "rewards/accuracies": 0.5750000476837158,
616
  "rewards/chosen": -0.4609375,
 
624
  "learning_rate": 2.1581061587634987e-07,
625
  "logits/chosen": -1.203125,
626
  "logits/rejected": -1.2421875,
 
627
  "logps/chosen": -428.0,
628
+ "logps/chosen_bottom_tokens": -14.3125,
629
+ "logps/chosen_top_tokens": -0.00075531005859375,
630
  "logps/rejected": -388.0,
631
+ "logps/rejected_bottom_tokens": -14.3125,
632
+ "logps/rejected_top_tokens": -0.00079345703125,
633
  "loss": 0.6396,
634
  "rewards/accuracies": 0.5999999642372131,
635
  "rewards/chosen": -0.482421875,
 
641
  "epoch": 0.5888125613346418,
642
  "eval_logits/chosen": -1.1875,
643
  "eval_logits/rejected": -1.234375,
 
644
  "eval_logps/chosen": -438.0,
645
+ "eval_logps/chosen_bottom_tokens": -14.375,
646
+ "eval_logps/chosen_top_tokens": -0.0007476806640625,
647
  "eval_logps/rejected": -406.0,
648
+ "eval_logps/rejected_bottom_tokens": -14.3125,
649
+ "eval_logps/rejected_top_tokens": -0.000759124755859375,
650
  "eval_loss": 0.6303857564926147,
651
  "eval_rewards/accuracies": 0.6626865863800049,
652
  "eval_rewards/chosen": -0.451171875,
653
  "eval_rewards/margins": 0.2275390625,
654
  "eval_rewards/rejected": -0.6796875,
655
+ "eval_runtime": 111.5027,
656
+ "eval_samples_per_second": 17.937,
657
+ "eval_steps_per_second": 0.601,
658
  "step": 300
659
  },
660
  {
 
663
  "learning_rate": 1.9891702030116897e-07,
664
  "logits/chosen": -1.140625,
665
  "logits/rejected": -1.2421875,
 
666
  "logps/chosen": -446.0,
667
+ "logps/chosen_bottom_tokens": -14.375,
668
+ "logps/chosen_top_tokens": -0.00074005126953125,
669
  "logps/rejected": -358.0,
670
+ "logps/rejected_bottom_tokens": -14.25,
671
+ "logps/rejected_top_tokens": -0.0007171630859375,
672
  "loss": 0.6234,
673
  "rewards/accuracies": 0.6949999928474426,
674
  "rewards/chosen": -0.408203125,
 
682
  "learning_rate": 1.8226368099107792e-07,
683
  "logits/chosen": -1.1640625,
684
  "logits/rejected": -1.2109375,
 
685
  "logps/chosen": -424.0,
686
+ "logps/chosen_bottom_tokens": -14.3125,
687
+ "logps/chosen_top_tokens": -0.00080108642578125,
688
  "logps/rejected": -364.0,
689
+ "logps/rejected_bottom_tokens": -14.25,
690
+ "logps/rejected_top_tokens": -0.000843048095703125,
691
  "loss": 0.6241,
692
  "rewards/accuracies": 0.675000011920929,
693
  "rewards/chosen": -0.42578125,
 
701
  "learning_rate": 1.6592892284269594e-07,
702
  "logits/chosen": -1.1796875,
703
  "logits/rejected": -1.2109375,
 
704
  "logps/chosen": -408.0,
705
+ "logps/chosen_bottom_tokens": -14.375,
706
+ "logps/chosen_top_tokens": -0.0007171630859375,
707
  "logps/rejected": -386.0,
708
+ "logps/rejected_bottom_tokens": -14.3125,
709
+ "logps/rejected_top_tokens": -0.000732421875,
710
  "loss": 0.6224,
711
  "rewards/accuracies": 0.6799999475479126,
712
  "rewards/chosen": -0.431640625,
 
720
  "learning_rate": 1.4998957238429172e-07,
721
  "logits/chosen": -1.21875,
722
  "logits/rejected": -1.2421875,
 
723
  "logps/chosen": -408.0,
724
+ "logps/chosen_bottom_tokens": -14.3125,
725
+ "logps/chosen_top_tokens": -0.000804901123046875,
726
  "logps/rejected": -380.0,
727
+ "logps/rejected_bottom_tokens": -14.3125,
728
+ "logps/rejected_top_tokens": -0.00077056884765625,
729
  "loss": 0.6204,
730
  "rewards/accuracies": 0.6300000548362732,
731
  "rewards/chosen": -0.5078125,
 
739
  "learning_rate": 1.345205964410517e-07,
740
  "logits/chosen": -1.1953125,
741
  "logits/rejected": -1.21875,
 
742
  "logps/chosen": -392.0,
743
+ "logps/chosen_bottom_tokens": -14.375,
744
+ "logps/chosen_top_tokens": -0.000858306884765625,
745
  "logps/rejected": -372.0,
746
+ "logps/rejected_bottom_tokens": -14.3125,
747
+ "logps/rejected_top_tokens": -0.00089263916015625,
748
  "loss": 0.627,
749
  "rewards/accuracies": 0.5850000381469727,
750
  "rewards/chosen": -0.49609375,
 
758
  "learning_rate": 1.1959474954700665e-07,
759
  "logits/chosen": -1.1796875,
760
  "logits/rejected": -1.21875,
 
761
  "logps/chosen": -424.0,
762
+ "logps/chosen_bottom_tokens": -14.375,
763
+ "logps/chosen_top_tokens": -0.00067138671875,
764
  "logps/rejected": -416.0,
765
+ "logps/rejected_bottom_tokens": -14.25,
766
+ "logps/rejected_top_tokens": -0.000675201416015625,
767
  "loss": 0.613,
768
  "rewards/accuracies": 0.7049999833106995,
769
  "rewards/chosen": -0.44140625,
 
777
  "learning_rate": 1.0528223176192615e-07,
778
  "logits/chosen": -1.1953125,
779
  "logits/rejected": -1.234375,
 
780
  "logps/chosen": -442.0,
781
+ "logps/chosen_bottom_tokens": -14.375,
782
+ "logps/chosen_top_tokens": -0.00069427490234375,
783
  "logps/rejected": -398.0,
784
+ "logps/rejected_bottom_tokens": -14.375,
785
+ "logps/rejected_top_tokens": -0.00067901611328125,
786
  "loss": 0.6218,
787
  "rewards/accuracies": 0.6250000596046448,
788
  "rewards/chosen": -0.5234375,
 
796
  "learning_rate": 9.16503585025567e-08,
797
  "logits/chosen": -1.1953125,
798
  "logits/rejected": -1.21875,
 
799
  "logps/chosen": -420.0,
800
+ "logps/chosen_bottom_tokens": -14.25,
801
+ "logps/chosen_top_tokens": -0.00090789794921875,
802
  "logps/rejected": -412.0,
803
+ "logps/rejected_bottom_tokens": -14.1875,
804
+ "logps/rejected_top_tokens": -0.00104522705078125,
805
  "loss": 0.6279,
806
  "rewards/accuracies": 0.6499999761581421,
807
  "rewards/chosen": -0.5,
 
815
  "learning_rate": 7.876324394107017e-08,
816
  "logits/chosen": -1.15625,
817
  "logits/rejected": -1.203125,
 
818
  "logps/chosen": -442.0,
819
+ "logps/chosen_bottom_tokens": -14.3125,
820
+ "logps/chosen_top_tokens": -0.00067901611328125,
821
  "logps/rejected": -418.0,
822
+ "logps/rejected_bottom_tokens": -14.25,
823
+ "logps/rejected_top_tokens": -0.00069427490234375,
824
  "loss": 0.6289,
825
  "rewards/accuracies": 0.6350000500679016,
826
  "rewards/chosen": -0.50390625,
 
834
  "learning_rate": 6.668149945978201e-08,
835
  "logits/chosen": -1.1953125,
836
  "logits/rejected": -1.2265625,
 
837
  "logps/chosen": -440.0,
838
+ "logps/chosen_bottom_tokens": -14.3125,
839
+ "logps/chosen_top_tokens": -0.00070953369140625,
840
  "logps/rejected": -420.0,
841
+ "logps/rejected_bottom_tokens": -14.25,
842
+ "logps/rejected_top_tokens": -0.000732421875,
843
  "loss": 0.6102,
844
  "rewards/accuracies": 0.6700000166893005,
845
  "rewards/chosen": -0.482421875,
 
851
  "epoch": 0.7850834151128557,
852
  "eval_logits/chosen": -1.1875,
853
  "eval_logits/rejected": -1.234375,
 
854
  "eval_logps/chosen": -444.0,
855
+ "eval_logps/chosen_bottom_tokens": -14.3125,
856
+ "eval_logps/chosen_top_tokens": -0.00067138671875,
857
  "eval_logps/rejected": -414.0,
858
+ "eval_logps/rejected_bottom_tokens": -14.25,
859
+ "eval_logps/rejected_top_tokens": -0.00066375732421875,
860
  "eval_loss": 0.6267920136451721,
861
  "eval_rewards/accuracies": 0.6567164063453674,
862
  "eval_rewards/chosen": -0.50390625,
863
  "eval_rewards/margins": 0.2578125,
864
  "eval_rewards/rejected": -0.76171875,
865
+ "eval_runtime": 111.5791,
866
+ "eval_samples_per_second": 17.925,
867
+ "eval_steps_per_second": 0.6,
868
  "step": 400
869
  },
870
  {
 
873
  "learning_rate": 5.546194858038072e-08,
874
  "logits/chosen": -1.171875,
875
  "logits/rejected": -1.21875,
 
876
  "logps/chosen": -416.0,
877
+ "logps/chosen_bottom_tokens": -14.3125,
878
+ "logps/chosen_top_tokens": -0.000766754150390625,
879
  "logps/rejected": -374.0,
880
+ "logps/rejected_bottom_tokens": -14.25,
881
+ "logps/rejected_top_tokens": -0.000827789306640625,
882
  "loss": 0.6227,
883
  "rewards/accuracies": 0.6699999570846558,
884
  "rewards/chosen": -0.51171875,
 
892
  "learning_rate": 4.5157359708432626e-08,
893
  "logits/chosen": -1.1953125,
894
  "logits/rejected": -1.2265625,
 
895
  "logps/chosen": -394.0,
896
+ "logps/chosen_bottom_tokens": -14.25,
897
+ "logps/chosen_top_tokens": -0.00075531005859375,
898
  "logps/rejected": -412.0,
899
+ "logps/rejected_bottom_tokens": -14.1875,
900
+ "logps/rejected_top_tokens": -0.0007476806640625,
901
  "loss": 0.6205,
902
  "rewards/accuracies": 0.625,
903
  "rewards/chosen": -0.474609375,
 
911
  "learning_rate": 3.581619795012874e-08,
912
  "logits/chosen": -1.1796875,
913
  "logits/rejected": -1.1875,
 
914
  "logps/chosen": -400.0,
915
+ "logps/chosen_bottom_tokens": -14.1875,
916
+ "logps/chosen_top_tokens": -0.000782012939453125,
917
  "logps/rejected": -404.0,
918
+ "logps/rejected_bottom_tokens": -14.1875,
919
+ "logps/rejected_top_tokens": -0.000743865966796875,
920
  "loss": 0.6208,
921
  "rewards/accuracies": 0.675000011920929,
922
  "rewards/chosen": -0.474609375,
 
930
  "learning_rate": 2.748239716854589e-08,
931
  "logits/chosen": -1.2109375,
932
  "logits/rejected": -1.1953125,
 
933
  "logps/chosen": -424.0,
934
+ "logps/chosen_bottom_tokens": -14.1875,
935
+ "logps/chosen_top_tokens": -0.000728607177734375,
936
  "logps/rejected": -420.0,
937
+ "logps/rejected_bottom_tokens": -14.25,
938
+ "logps/rejected_top_tokens": -0.000751495361328125,
939
  "loss": 0.6398,
940
  "rewards/accuracies": 0.5849999785423279,
941
  "rewards/chosen": -0.51171875,
 
949
  "learning_rate": 2.0195153351498323e-08,
950
  "logits/chosen": -1.1796875,
951
  "logits/rejected": -1.2109375,
 
952
  "logps/chosen": -432.0,
953
+ "logps/chosen_bottom_tokens": -14.375,
954
+ "logps/chosen_top_tokens": -0.000732421875,
955
  "logps/rejected": -420.0,
956
+ "logps/rejected_bottom_tokens": -14.3125,
957
+ "logps/rejected_top_tokens": -0.000705718994140625,
958
  "loss": 0.611,
959
  "rewards/accuracies": 0.6149999499320984,
960
  "rewards/chosen": -0.53125,
 
968
  "learning_rate": 1.3988740262822846e-08,
969
  "logits/chosen": -1.1953125,
970
  "logits/rejected": -1.203125,
 
971
  "logps/chosen": -428.0,
972
+ "logps/chosen_bottom_tokens": -14.375,
973
+ "logps/chosen_top_tokens": -0.00067138671875,
974
  "logps/rejected": -410.0,
975
+ "logps/rejected_bottom_tokens": -14.3125,
976
+ "logps/rejected_top_tokens": -0.000667572021484375,
977
  "loss": 0.6138,
978
  "rewards/accuracies": 0.6349999904632568,
979
  "rewards/chosen": -0.490234375,
 
987
  "learning_rate": 8.892348244137788e-09,
988
  "logits/chosen": -1.1875,
989
  "logits/rejected": -1.2421875,
 
990
  "logps/chosen": -474.0,
991
+ "logps/chosen_bottom_tokens": -14.5625,
992
+ "logps/chosen_top_tokens": -0.000675201416015625,
993
  "logps/rejected": -444.0,
994
+ "logps/rejected_bottom_tokens": -14.375,
995
+ "logps/rejected_top_tokens": -0.000652313232421875,
996
  "loss": 0.6106,
997
  "rewards/accuracies": 0.6299999952316284,
998
  "rewards/chosen": -0.494140625,
 
1006
  "learning_rate": 4.929946925231076e-09,
1007
  "logits/chosen": -1.1328125,
1008
  "logits/rejected": -1.171875,
 
1009
  "logps/chosen": -410.0,
1010
+ "logps/chosen_bottom_tokens": -14.375,
1011
+ "logps/chosen_top_tokens": -0.0007476806640625,
1012
  "logps/rejected": -412.0,
1013
+ "logps/rejected_bottom_tokens": -14.25,
1014
+ "logps/rejected_top_tokens": -0.00095367431640625,
1015
  "loss": 0.6203,
1016
  "rewards/accuracies": 0.6049999594688416,
1017
  "rewards/chosen": -0.48046875,
 
1025
  "learning_rate": 2.1201724887858484e-09,
1026
  "logits/chosen": -1.1640625,
1027
  "logits/rejected": -1.171875,
 
1028
  "logps/chosen": -422.0,
1029
+ "logps/chosen_bottom_tokens": -14.25,
1030
+ "logps/chosen_top_tokens": -0.00072479248046875,
1031
  "logps/rejected": -412.0,
1032
+ "logps/rejected_bottom_tokens": -14.1875,
1033
+ "logps/rejected_top_tokens": -0.000743865966796875,
1034
  "loss": 0.6235,
1035
  "rewards/accuracies": 0.5949999690055847,
1036
  "rewards/chosen": -0.5390625,
 
1044
  "learning_rate": 4.762400196664518e-10,
1045
  "logits/chosen": -1.1484375,
1046
  "logits/rejected": -1.1953125,
 
1047
  "logps/chosen": -428.0,
1048
+ "logps/chosen_bottom_tokens": -14.375,
1049
+ "logps/chosen_top_tokens": -0.000621795654296875,
1050
  "logps/rejected": -388.0,
1051
+ "logps/rejected_bottom_tokens": -14.3125,
1052
+ "logps/rejected_top_tokens": -0.0006256103515625,
1053
  "loss": 0.6084,
1054
  "rewards/accuracies": 0.625,
1055
  "rewards/chosen": -0.578125,
 
1061
  "epoch": 0.9813542688910697,
1062
  "eval_logits/chosen": -1.1953125,
1063
  "eval_logits/rejected": -1.2421875,
 
1064
  "eval_logps/chosen": -446.0,
1065
+ "eval_logps/chosen_bottom_tokens": -14.375,
1066
+ "eval_logps/chosen_top_tokens": -0.000743865966796875,
1067
  "eval_logps/rejected": -416.0,
1068
+ "eval_logps/rejected_bottom_tokens": -14.3125,
1069
+ "eval_logps/rejected_top_tokens": -0.0007476806640625,
1070
  "eval_loss": 0.6259472370147705,
1071
  "eval_rewards/accuracies": 0.6567164659500122,
1072
  "eval_rewards/chosen": -0.5234375,
1073
  "eval_rewards/margins": 0.26171875,
1074
  "eval_rewards/rejected": -0.78515625,
1075
+ "eval_runtime": 111.4505,
1076
+ "eval_samples_per_second": 17.945,
1077
+ "eval_steps_per_second": 0.601,
1078
  "step": 500
1079
  },
1080
  {
1081
  "epoch": 0.9990186457311089,
1082
  "step": 509,
1083
  "total_flos": 0.0,
1084
+ "train_loss": 0.6464882252961105,
1085
+ "train_runtime": 8284.9703,
1086
+ "train_samples_per_second": 7.379,
1087
+ "train_steps_per_second": 0.061
1088
  }
1089
  ],
1090
  "logging_steps": 10,