wzhouad commited on
Commit
742620d
1 Parent(s): 6a5fd01

Model save

Browse files
README.md CHANGED
@@ -16,6 +16,16 @@ should probably proofread and complete it, then remove this comment. -->
16
  # zephyr-7b-dpo-full
17
 
18
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
 
 
 
 
 
 
 
 
 
 
19
 
20
  ## Model description
21
 
@@ -37,7 +47,7 @@ The following hyperparameters were used during training:
37
  - learning_rate: 5e-07
38
  - train_batch_size: 8
39
  - eval_batch_size: 8
40
- - seed: 3
41
  - distributed_type: multi-GPU
42
  - num_devices: 8
43
  - gradient_accumulation_steps: 2
@@ -50,6 +60,16 @@ The following hyperparameters were used during training:
50
 
51
  ### Training results
52
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  ### Framework versions
 
16
  # zephyr-7b-dpo-full
17
 
18
  This model is a fine-tuned version of [HuggingFaceH4/mistral-7b-sft-beta](https://huggingface.co/HuggingFaceH4/mistral-7b-sft-beta) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0870
21
+ - Rewards/chosen: -2.7029
22
+ - Rewards/rejected: -3.6206
23
+ - Rewards/accuracies: 0.7461
24
+ - Rewards/margins: 0.9178
25
+ - Logps/rejected: -619.4178
26
+ - Logps/chosen: -527.3273
27
+ - Logits/rejected: -1.9016
28
+ - Logits/chosen: -1.9549
29
 
30
  ## Model description
31
 
 
47
  - learning_rate: 5e-07
48
  - train_batch_size: 8
49
  - eval_batch_size: 8
50
+ - seed: 1
51
  - distributed_type: multi-GPU
52
  - num_devices: 8
53
  - gradient_accumulation_steps: 2
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.2588 | 0.11 | 100 | 0.2530 | -0.6480 | -0.9151 | 0.6406 | 0.2671 | -348.8655 | -321.8432 | -2.5122 | -2.5274 |
66
+ | 0.1405 | 0.23 | 200 | 0.1423 | -1.6364 | -2.1668 | 0.7070 | 0.5303 | -474.0294 | -420.6826 | -2.1132 | -2.1455 |
67
+ | 0.0841 | 0.34 | 300 | 0.1030 | -2.2868 | -3.0540 | 0.7383 | 0.7672 | -562.7563 | -485.7206 | -1.9654 | -2.0047 |
68
+ | 0.0916 | 0.45 | 400 | 0.1080 | -2.2956 | -3.0938 | 0.7344 | 0.7982 | -566.7339 | -486.5965 | -1.9394 | -1.9812 |
69
+ | 0.0864 | 0.57 | 500 | 0.0956 | -2.3099 | -3.1316 | 0.7461 | 0.8217 | -570.5160 | -488.0345 | -1.9095 | -1.9560 |
70
+ | 0.065 | 0.68 | 600 | 0.0849 | -2.8564 | -3.7576 | 0.7266 | 0.9012 | -633.1135 | -542.6826 | -1.8868 | -1.9405 |
71
+ | 0.0663 | 0.79 | 700 | 0.0840 | -2.8127 | -3.7340 | 0.7383 | 0.9213 | -630.7556 | -538.3111 | -1.9100 | -1.9627 |
72
+ | 0.0663 | 0.91 | 800 | 0.0870 | -2.7029 | -3.6206 | 0.7461 | 0.9178 | -619.4178 | -527.3273 | -1.9016 | -1.9549 |
73
 
74
 
75
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.13438091388455145,
4
- "train_runtime": 3218.6044,
5
- "train_samples": 51894,
6
- "train_samples_per_second": 16.123,
7
- "train_steps_per_second": 0.126
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.11349766382510908,
4
+ "train_runtime": 8005.048,
5
+ "train_samples": 113028,
6
+ "train_samples_per_second": 14.12,
7
+ "train_steps_per_second": 0.11
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb8362a83dbb810604c9bafec85943233773008800e5be078eda7676e6a5c17d
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0100db022e508c7165c36f69d0b9b8bb891630b5098773fb118894879c59a83
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d89dd4954ae3d1cf6d18c20fe5c41eace0d5e2cdd13d0e454a7ba4e26f5a2774
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1b514fc11019e3edb6963595ffc32c3f463b576c83ad6e179c0fb4d3ecdc9c7
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3564f1a5524b5df57acdcc790c6cb97db2f4bf1e30dcaacd947fb7902ebd95fa
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a87d655aa114c81d612aa0a478ca3c21caf414fcf7afc6ba84774275794d4706
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.13438091388455145,
4
- "train_runtime": 3218.6044,
5
- "train_samples": 51894,
6
- "train_samples_per_second": 16.123,
7
- "train_steps_per_second": 0.126
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.11349766382510908,
4
+ "train_runtime": 8005.048,
5
+ "train_samples": 113028,
6
+ "train_samples_per_second": 14.12,
7
+ "train_steps_per_second": 0.11
8
  }
trainer_state.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.998766954377312,
5
- "eval_steps": 1000,
6
- "global_step": 405,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.2195121951219512e-08,
14
- "logits/chosen": -2.8088459968566895,
15
- "logits/rejected": -2.7595884799957275,
16
- "logps/chosen": -368.90777587890625,
17
- "logps/rejected": -133.10202026367188,
18
- "loss": 0.4545,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
@@ -23,579 +23,1379 @@
23
  "step": 1
24
  },
25
  {
26
- "epoch": 0.02,
27
- "learning_rate": 1.219512195121951e-07,
28
- "logits/chosen": -2.83878231048584,
29
- "logits/rejected": -2.824958562850952,
30
- "logps/chosen": -433.8194580078125,
31
- "logps/rejected": -114.66372680664062,
32
- "loss": 0.4227,
33
- "rewards/accuracies": 0.5416666865348816,
34
- "rewards/chosen": 0.0010460919002071023,
35
- "rewards/margins": 0.0013696590904146433,
36
- "rewards/rejected": -0.000323567190207541,
37
  "step": 10
38
  },
39
  {
40
- "epoch": 0.05,
41
- "learning_rate": 2.439024390243902e-07,
42
- "logits/chosen": -2.7982840538024902,
43
- "logits/rejected": -2.7652382850646973,
44
- "logps/chosen": -436.67694091796875,
45
- "logps/rejected": -109.33970642089844,
46
- "loss": 0.4254,
47
- "rewards/accuracies": 0.7562500238418579,
48
- "rewards/chosen": 0.020646633580327034,
49
- "rewards/margins": 0.03670011833310127,
50
- "rewards/rejected": -0.01605348475277424,
51
  "step": 20
52
  },
53
  {
54
- "epoch": 0.07,
55
- "learning_rate": 3.6585365853658536e-07,
56
- "logits/chosen": -2.717103958129883,
57
- "logits/rejected": -2.6900384426116943,
58
- "logps/chosen": -422.26702880859375,
59
- "logps/rejected": -128.1683349609375,
60
- "loss": 0.4108,
61
- "rewards/accuracies": 0.7749999761581421,
62
- "rewards/chosen": 0.07094015926122665,
63
- "rewards/margins": 0.20011821389198303,
64
- "rewards/rejected": -0.12917804718017578,
65
  "step": 30
66
  },
67
  {
68
- "epoch": 0.1,
69
- "learning_rate": 4.878048780487804e-07,
70
- "logits/chosen": -2.590641498565674,
71
- "logits/rejected": -2.5721707344055176,
72
- "logps/chosen": -396.3973693847656,
73
- "logps/rejected": -139.94859313964844,
74
- "loss": 0.3881,
75
- "rewards/accuracies": 0.762499988079071,
76
- "rewards/chosen": 0.022975314408540726,
77
- "rewards/margins": 0.4287249445915222,
78
- "rewards/rejected": -0.4057496190071106,
79
  "step": 40
80
  },
81
  {
82
- "epoch": 0.12,
83
- "learning_rate": 4.992461696250783e-07,
84
- "logits/chosen": -2.42146635055542,
85
- "logits/rejected": -2.394202709197998,
86
- "logps/chosen": -445.91644287109375,
87
- "logps/rejected": -205.5404052734375,
88
- "loss": 0.316,
89
- "rewards/accuracies": 0.800000011920929,
90
- "rewards/chosen": -0.03603144362568855,
91
- "rewards/margins": 0.8689195513725281,
92
- "rewards/rejected": -0.9049509763717651,
93
  "step": 50
94
  },
95
  {
96
- "epoch": 0.15,
97
- "learning_rate": 4.966461721767899e-07,
98
- "logits/chosen": -2.417520046234131,
99
- "logits/rejected": -2.3663182258605957,
100
- "logps/chosen": -422.27215576171875,
101
- "logps/rejected": -255.75912475585938,
102
- "loss": 0.2661,
103
- "rewards/accuracies": 0.7437499761581421,
104
- "rewards/chosen": -0.35185474157333374,
105
- "rewards/margins": 0.9545990228652954,
106
- "rewards/rejected": -1.3064535856246948,
107
  "step": 60
108
  },
109
  {
110
- "epoch": 0.17,
111
- "learning_rate": 4.922100518015975e-07,
112
- "logits/chosen": -2.45034122467041,
113
- "logits/rejected": -2.397273540496826,
114
- "logps/chosen": -428.19207763671875,
115
- "logps/rejected": -294.82501220703125,
116
- "loss": 0.198,
117
- "rewards/accuracies": 0.7875000238418579,
118
- "rewards/chosen": -0.4432826638221741,
119
- "rewards/margins": 1.3181250095367432,
120
- "rewards/rejected": -1.7614076137542725,
121
  "step": 70
122
  },
123
  {
124
- "epoch": 0.2,
125
- "learning_rate": 4.859708325770919e-07,
126
- "logits/chosen": -2.3751111030578613,
127
- "logits/rejected": -2.321465015411377,
128
- "logps/chosen": -468.4130859375,
129
- "logps/rejected": -331.666259765625,
130
- "loss": 0.162,
131
- "rewards/accuracies": 0.793749988079071,
132
- "rewards/chosen": -0.7735603451728821,
133
- "rewards/margins": 1.4863694906234741,
134
- "rewards/rejected": -2.25993013381958,
135
  "step": 80
136
  },
137
  {
138
- "epoch": 0.22,
139
- "learning_rate": 4.779749614980225e-07,
140
- "logits/chosen": -2.381338596343994,
141
- "logits/rejected": -2.327340602874756,
142
- "logps/chosen": -520.724365234375,
143
- "logps/rejected": -380.0218811035156,
144
- "loss": 0.1464,
145
- "rewards/accuracies": 0.8812500238418579,
146
- "rewards/chosen": -0.4466208815574646,
147
- "rewards/margins": 2.054797649383545,
148
- "rewards/rejected": -2.5014188289642334,
149
  "step": 90
150
  },
151
  {
152
- "epoch": 0.25,
153
- "learning_rate": 4.682819627081427e-07,
154
- "logits/chosen": -2.3299832344055176,
155
- "logits/rejected": -2.2486767768859863,
156
- "logps/chosen": -477.24261474609375,
157
- "logps/rejected": -372.49017333984375,
158
- "loss": 0.1456,
159
- "rewards/accuracies": 0.862500011920929,
160
- "rewards/chosen": -0.6445478200912476,
161
- "rewards/margins": 1.895777702331543,
162
- "rewards/rejected": -2.54032564163208,
163
  "step": 100
164
  },
165
  {
166
- "epoch": 0.27,
167
- "learning_rate": 4.569639943810477e-07,
168
- "logits/chosen": -2.3097102642059326,
169
- "logits/rejected": -2.226323127746582,
170
- "logps/chosen": -495.50469970703125,
171
- "logps/rejected": -389.80078125,
172
- "loss": 0.1283,
173
- "rewards/accuracies": 0.78125,
174
- "rewards/chosen": -0.8834150433540344,
175
- "rewards/margins": 1.8450326919555664,
176
- "rewards/rejected": -2.728447675704956,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  "step": 110
178
  },
179
  {
180
- "epoch": 0.3,
181
- "learning_rate": 4.4410531154874543e-07,
182
- "logits/chosen": -2.3541078567504883,
183
- "logits/rejected": -2.2549960613250732,
184
- "logps/chosen": -524.7901000976562,
185
- "logps/rejected": -398.75775146484375,
186
- "loss": 0.1283,
187
- "rewards/accuracies": 0.78125,
188
- "rewards/chosen": -0.7295175790786743,
189
- "rewards/margins": 1.9527451992034912,
190
- "rewards/rejected": -2.682262897491455,
191
  "step": 120
192
  },
193
  {
194
- "epoch": 0.32,
195
- "learning_rate": 4.298016388768561e-07,
196
- "logits/chosen": -2.3804497718811035,
197
- "logits/rejected": -2.2821872234344482,
198
- "logps/chosen": -518.573974609375,
199
- "logps/rejected": -398.14306640625,
200
- "loss": 0.114,
201
- "rewards/accuracies": 0.84375,
202
- "rewards/chosen": -0.5337150692939758,
203
- "rewards/margins": 2.2159152030944824,
204
- "rewards/rejected": -2.7496302127838135,
205
  "step": 130
206
  },
207
  {
208
- "epoch": 0.35,
209
- "learning_rate": 4.1415945805573005e-07,
210
- "logits/chosen": -2.309293270111084,
211
- "logits/rejected": -2.2271227836608887,
212
- "logps/chosen": -486.838623046875,
213
- "logps/rejected": -373.0490417480469,
214
- "loss": 0.1246,
215
- "rewards/accuracies": 0.862500011920929,
216
- "rewards/chosen": -0.6586702466011047,
217
- "rewards/margins": 1.7459022998809814,
218
- "rewards/rejected": -2.4045722484588623,
219
  "step": 140
220
  },
221
  {
222
- "epoch": 0.37,
223
- "learning_rate": 3.972952151123984e-07,
224
- "logits/chosen": -2.288892984390259,
225
- "logits/rejected": -2.1915061473846436,
226
- "logps/chosen": -450.01556396484375,
227
- "logps/rejected": -368.2213439941406,
228
- "loss": 0.1152,
229
- "rewards/accuracies": 0.856249988079071,
230
- "rewards/chosen": -0.7529748678207397,
231
- "rewards/margins": 1.9199845790863037,
232
- "rewards/rejected": -2.672959566116333,
233
  "step": 150
234
  },
235
  {
236
- "epoch": 0.39,
237
- "learning_rate": 3.793344535444142e-07,
238
- "logits/chosen": -2.2575857639312744,
239
- "logits/rejected": -2.1550350189208984,
240
- "logps/chosen": -547.2183837890625,
241
- "logps/rejected": -409.57989501953125,
242
- "loss": 0.088,
243
- "rewards/accuracies": 0.8125,
244
- "rewards/chosen": -0.8003584146499634,
245
- "rewards/margins": 2.1838386058807373,
246
- "rewards/rejected": -2.9841971397399902,
247
  "step": 160
248
  },
249
  {
250
- "epoch": 0.42,
251
- "learning_rate": 3.604108797288461e-07,
252
- "logits/chosen": -2.2742323875427246,
253
- "logits/rejected": -2.167198419570923,
254
- "logps/chosen": -547.2274169921875,
255
- "logps/rejected": -456.614501953125,
256
- "loss": 0.0776,
257
- "rewards/accuracies": 0.862500011920929,
258
- "rewards/chosen": -1.0825190544128418,
259
- "rewards/margins": 2.3789236545562744,
260
- "rewards/rejected": -3.4614429473876953,
261
  "step": 170
262
  },
263
  {
264
- "epoch": 0.44,
265
- "learning_rate": 3.40665367563858e-07,
266
- "logits/chosen": -2.2402544021606445,
267
- "logits/rejected": -2.1346538066864014,
268
- "logps/chosen": -564.0145263671875,
269
- "logps/rejected": -489.21160888671875,
270
- "loss": 0.0697,
271
- "rewards/accuracies": 0.7749999761581421,
272
- "rewards/chosen": -1.539156198501587,
273
- "rewards/margins": 2.1975486278533936,
274
- "rewards/rejected": -3.7367050647735596,
275
  "step": 180
276
  },
277
  {
278
- "epoch": 0.47,
279
- "learning_rate": 3.202449097526798e-07,
280
- "logits/chosen": -2.3025131225585938,
281
- "logits/rejected": -2.224256992340088,
282
- "logps/chosen": -505.39520263671875,
283
- "logps/rejected": -423.83026123046875,
284
- "loss": 0.0811,
285
- "rewards/accuracies": 0.831250011920929,
286
- "rewards/chosen": -1.0325360298156738,
287
- "rewards/margins": 2.132319927215576,
288
- "rewards/rejected": -3.16485595703125,
289
  "step": 190
290
  },
291
  {
292
- "epoch": 0.49,
293
- "learning_rate": 2.993015235369905e-07,
294
- "logits/chosen": -2.3023552894592285,
295
- "logits/rejected": -2.2043874263763428,
296
- "logps/chosen": -525.6875610351562,
297
- "logps/rejected": -416.1629333496094,
298
- "loss": 0.0979,
299
- "rewards/accuracies": 0.856249988079071,
300
- "rewards/chosen": -0.8641435503959656,
301
- "rewards/margins": 2.118994951248169,
302
- "rewards/rejected": -2.9831383228302,
303
  "step": 200
304
  },
305
  {
306
- "epoch": 0.52,
307
- "learning_rate": 2.7799111902582693e-07,
308
- "logits/chosen": -2.3067820072174072,
309
- "logits/rejected": -2.2110161781311035,
310
- "logps/chosen": -492.69927978515625,
311
- "logps/rejected": -381.31878662109375,
312
- "loss": 0.0872,
313
- "rewards/accuracies": 0.793749988079071,
314
- "rewards/chosen": -0.9221334457397461,
315
- "rewards/margins": 1.870031714439392,
316
- "rewards/rejected": -2.7921650409698486,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  "step": 210
318
  },
319
  {
320
- "epoch": 0.54,
321
- "learning_rate": 2.564723385445869e-07,
322
- "logits/chosen": -2.3406167030334473,
323
- "logits/rejected": -2.2510488033294678,
324
- "logps/chosen": -520.8443603515625,
325
- "logps/rejected": -442.00732421875,
326
- "loss": 0.0908,
327
- "rewards/accuracies": 0.8062499761581421,
328
- "rewards/chosen": -1.0322405099868774,
329
- "rewards/margins": 2.144731044769287,
330
- "rewards/rejected": -3.176971912384033,
331
  "step": 220
332
  },
333
  {
334
- "epoch": 0.57,
335
- "learning_rate": 2.3490537564442845e-07,
336
- "logits/chosen": -2.284823179244995,
337
- "logits/rejected": -2.1653401851654053,
338
- "logps/chosen": -511.96929931640625,
339
- "logps/rejected": -426.46356201171875,
340
- "loss": 0.0967,
341
- "rewards/accuracies": 0.793749988079071,
342
- "rewards/chosen": -1.2002372741699219,
343
- "rewards/margins": 1.9990075826644897,
344
- "rewards/rejected": -3.199244976043701,
345
  "step": 230
346
  },
347
  {
348
- "epoch": 0.59,
349
- "learning_rate": 2.1345078256378801e-07,
350
- "logits/chosen": -2.321927547454834,
351
- "logits/rejected": -2.215357780456543,
352
- "logps/chosen": -495.8760681152344,
353
- "logps/rejected": -439.46282958984375,
354
- "loss": 0.0955,
355
- "rewards/accuracies": 0.862500011920929,
356
- "rewards/chosen": -0.8706371188163757,
357
- "rewards/margins": 2.3429722785949707,
358
- "rewards/rejected": -3.213609218597412,
359
  "step": 240
360
  },
361
  {
362
- "epoch": 0.62,
363
- "learning_rate": 1.9226827501969865e-07,
364
- "logits/chosen": -2.3428966999053955,
365
- "logits/rejected": -2.2573530673980713,
366
- "logps/chosen": -526.4675903320312,
367
- "logps/rejected": -451.949462890625,
368
- "loss": 0.096,
369
- "rewards/accuracies": 0.8374999761581421,
370
- "rewards/chosen": -0.8379364013671875,
371
- "rewards/margins": 2.499549627304077,
372
- "rewards/rejected": -3.3374857902526855,
373
  "step": 250
374
  },
375
  {
376
- "epoch": 0.64,
377
- "learning_rate": 1.715155432264775e-07,
378
- "logits/chosen": -2.3556008338928223,
379
- "logits/rejected": -2.2766494750976562,
380
- "logps/chosen": -516.3786010742188,
381
- "logps/rejected": -430.13916015625,
382
- "loss": 0.0857,
383
- "rewards/accuracies": 0.875,
384
- "rewards/chosen": -0.8434340357780457,
385
- "rewards/margins": 2.294442653656006,
386
- "rewards/rejected": -3.1378769874572754,
387
  "step": 260
388
  },
389
  {
390
- "epoch": 0.67,
391
- "learning_rate": 1.51347077992983e-07,
392
- "logits/chosen": -2.3460044860839844,
393
- "logits/rejected": -2.281031370162964,
394
- "logps/chosen": -490.55078125,
395
- "logps/rejected": -423.6560974121094,
396
- "loss": 0.0821,
397
- "rewards/accuracies": 0.856249988079071,
398
- "rewards/chosen": -0.8685197830200195,
399
- "rewards/margins": 2.1445822715759277,
400
- "rewards/rejected": -3.0131022930145264,
401
  "step": 270
402
  },
403
  {
404
- "epoch": 0.69,
405
- "learning_rate": 1.3191302063739906e-07,
406
- "logits/chosen": -2.2882773876190186,
407
- "logits/rejected": -2.218071699142456,
408
- "logps/chosen": -500.769287109375,
409
- "logps/rejected": -446.246826171875,
410
- "loss": 0.0712,
411
- "rewards/accuracies": 0.8187500238418579,
412
- "rewards/chosen": -1.2157343626022339,
413
- "rewards/margins": 2.1158077716827393,
414
- "rewards/rejected": -3.3315422534942627,
415
  "step": 280
416
  },
417
  {
418
- "epoch": 0.72,
419
- "learning_rate": 1.1335804528119475e-07,
420
- "logits/chosen": -2.3649039268493652,
421
- "logits/rejected": -2.252676486968994,
422
- "logps/chosen": -540.1212158203125,
423
- "logps/rejected": -467.2939453125,
424
- "loss": 0.0686,
425
- "rewards/accuracies": 0.8687499761581421,
426
- "rewards/chosen": -1.0436217784881592,
427
- "rewards/margins": 2.6221861839294434,
428
- "rewards/rejected": -3.6658082008361816,
429
  "step": 290
430
  },
431
  {
432
- "epoch": 0.74,
433
- "learning_rate": 9.582028184286423e-08,
434
- "logits/chosen": -2.243900775909424,
435
- "logits/rejected": -2.1746292114257812,
436
- "logps/chosen": -503.1402282714844,
437
- "logps/rejected": -486.1592712402344,
438
- "loss": 0.0686,
439
- "rewards/accuracies": 0.84375,
440
- "rewards/chosen": -1.4328491687774658,
441
- "rewards/margins": 2.2128751277923584,
442
- "rewards/rejected": -3.6457245349884033,
443
  "step": 300
444
  },
445
  {
446
- "epoch": 0.76,
447
- "learning_rate": 7.943028774907065e-08,
448
- "logits/chosen": -2.2528328895568848,
449
- "logits/rejected": -2.170386791229248,
450
- "logps/chosen": -501.7100524902344,
451
- "logps/rejected": -471.88897705078125,
452
- "loss": 0.0689,
453
- "rewards/accuracies": 0.8500000238418579,
454
- "rewards/chosen": -1.172387719154358,
455
- "rewards/margins": 2.3613522052764893,
456
- "rewards/rejected": -3.533740282058716,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  "step": 310
458
  },
459
  {
460
- "epoch": 0.79,
461
- "learning_rate": 6.431007601814637e-08,
462
- "logits/chosen": -2.258288860321045,
463
- "logits/rejected": -2.1915061473846436,
464
- "logps/chosen": -471.57330322265625,
465
- "logps/rejected": -461.84417724609375,
466
- "loss": 0.0601,
467
- "rewards/accuracies": 0.8187500238418579,
468
- "rewards/chosen": -1.4386770725250244,
469
- "rewards/margins": 2.1069023609161377,
470
- "rewards/rejected": -3.545579433441162,
471
  "step": 320
472
  },
473
  {
474
- "epoch": 0.81,
475
- "learning_rate": 5.0572206951246e-08,
476
- "logits/chosen": -2.2368595600128174,
477
- "logits/rejected": -2.1402342319488525,
478
- "logps/chosen": -522.8599853515625,
479
- "logps/rejected": -482.84893798828125,
480
- "loss": 0.0626,
481
- "rewards/accuracies": 0.78125,
482
- "rewards/chosen": -1.5531214475631714,
483
- "rewards/margins": 2.2123360633850098,
484
- "rewards/rejected": -3.7654571533203125,
485
  "step": 330
486
  },
487
  {
488
- "epoch": 0.84,
489
- "learning_rate": 3.831895019292897e-08,
490
- "logits/chosen": -2.308152675628662,
491
- "logits/rejected": -2.2120919227600098,
492
- "logps/chosen": -565.0369873046875,
493
- "logps/rejected": -535.8488159179688,
494
- "loss": 0.0642,
495
- "rewards/accuracies": 0.831250011920929,
496
- "rewards/chosen": -1.306236982345581,
497
- "rewards/margins": 2.8749289512634277,
498
- "rewards/rejected": -4.181166172027588,
499
  "step": 340
500
  },
501
  {
502
- "epoch": 0.86,
503
- "learning_rate": 2.764152339909756e-08,
504
- "logits/chosen": -2.245577573776245,
505
- "logits/rejected": -2.1435444355010986,
506
- "logps/chosen": -546.0943603515625,
507
- "logps/rejected": -454.5082092285156,
508
- "loss": 0.0636,
509
- "rewards/accuracies": 0.8500000238418579,
510
- "rewards/chosen": -1.2082496881484985,
511
- "rewards/margins": 2.3495194911956787,
512
- "rewards/rejected": -3.5577690601348877,
513
  "step": 350
514
  },
515
  {
516
- "epoch": 0.89,
517
- "learning_rate": 1.861941317991664e-08,
518
- "logits/chosen": -2.302865505218506,
519
- "logits/rejected": -2.1724164485931396,
520
- "logps/chosen": -559.6376953125,
521
- "logps/rejected": -483.40771484375,
522
- "loss": 0.0675,
523
- "rewards/accuracies": 0.875,
524
- "rewards/chosen": -1.0334274768829346,
525
- "rewards/margins": 2.655003070831299,
526
- "rewards/rejected": -3.6884307861328125,
527
  "step": 360
528
  },
529
  {
530
- "epoch": 0.91,
531
- "learning_rate": 1.13197833728636e-08,
532
- "logits/chosen": -2.2556536197662354,
533
- "logits/rejected": -2.153872013092041,
534
- "logps/chosen": -521.9984130859375,
535
- "logps/rejected": -505.71673583984375,
536
- "loss": 0.06,
537
- "rewards/accuracies": 0.8812500238418579,
538
- "rewards/chosen": -1.1932189464569092,
539
- "rewards/margins": 2.7444043159484863,
540
- "rewards/rejected": -3.9376235008239746,
541
  "step": 370
542
  },
543
  {
544
- "epoch": 0.94,
545
- "learning_rate": 5.79697505093521e-09,
546
- "logits/chosen": -2.2588906288146973,
547
- "logits/rejected": -2.159388303756714,
548
- "logps/chosen": -529.9054565429688,
549
- "logps/rejected": -461.11700439453125,
550
- "loss": 0.0744,
551
- "rewards/accuracies": 0.7749999761581421,
552
- "rewards/chosen": -1.276084065437317,
553
- "rewards/margins": 2.2816543579101562,
554
- "rewards/rejected": -3.5577385425567627,
555
  "step": 380
556
  },
557
  {
558
- "epoch": 0.96,
559
- "learning_rate": 2.092101988131256e-09,
560
- "logits/chosen": -2.313697099685669,
561
- "logits/rejected": -2.171175003051758,
562
- "logps/chosen": -565.225830078125,
563
- "logps/rejected": -489.6360778808594,
564
- "loss": 0.0609,
565
- "rewards/accuracies": 0.893750011920929,
566
- "rewards/chosen": -1.0208733081817627,
567
- "rewards/margins": 2.784264087677002,
568
- "rewards/rejected": -3.8051371574401855,
569
  "step": 390
570
  },
571
  {
572
- "epoch": 0.99,
573
- "learning_rate": 2.327445937151673e-10,
574
- "logits/chosen": -2.29669189453125,
575
- "logits/rejected": -2.1986515522003174,
576
- "logps/chosen": -561.0698852539062,
577
- "logps/rejected": -510.22021484375,
578
- "loss": 0.0666,
579
- "rewards/accuracies": 0.875,
580
- "rewards/chosen": -1.1245156526565552,
581
- "rewards/margins": 2.6927759647369385,
582
- "rewards/rejected": -3.817291736602783,
583
  "step": 400
584
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
  {
586
  "epoch": 1.0,
587
- "step": 405,
588
  "total_flos": 0.0,
589
- "train_loss": 0.13438091388455145,
590
- "train_runtime": 3218.6044,
591
- "train_samples_per_second": 16.123,
592
- "train_steps_per_second": 0.126
593
  }
594
  ],
595
  "logging_steps": 10,
596
- "max_steps": 405,
597
  "num_train_epochs": 1,
598
- "save_steps": 1000,
599
  "total_flos": 0.0,
600
  "trial_name": null,
601
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9994340690435767,
5
+ "eval_steps": 100,
6
+ "global_step": 883,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 5.617977528089887e-09,
14
+ "logits/chosen": -2.763059616088867,
15
+ "logits/rejected": -2.7395401000976562,
16
+ "logps/chosen": -322.45367431640625,
17
+ "logps/rejected": -273.0731506347656,
18
+ "loss": 0.3632,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
 
23
  "step": 1
24
  },
25
  {
26
+ "epoch": 0.01,
27
+ "learning_rate": 5.617977528089887e-08,
28
+ "logits/chosen": -2.7944271564483643,
29
+ "logits/rejected": -2.7713630199432373,
30
+ "logps/chosen": -334.48004150390625,
31
+ "logps/rejected": -186.60906982421875,
32
+ "loss": 0.3527,
33
+ "rewards/accuracies": 0.4375,
34
+ "rewards/chosen": 4.6880424633855e-05,
35
+ "rewards/margins": 2.8881140679004602e-05,
36
+ "rewards/rejected": 1.7999276678892784e-05,
37
  "step": 10
38
  },
39
  {
40
+ "epoch": 0.02,
41
+ "learning_rate": 1.1235955056179774e-07,
42
+ "logits/chosen": -2.8209891319274902,
43
+ "logits/rejected": -2.8004016876220703,
44
+ "logps/chosen": -334.3337707519531,
45
+ "logps/rejected": -174.12008666992188,
46
+ "loss": 0.3507,
47
+ "rewards/accuracies": 0.65625,
48
+ "rewards/chosen": 0.0022166508715599775,
49
+ "rewards/margins": 0.003457559272646904,
50
+ "rewards/rejected": -0.001240908750332892,
51
  "step": 20
52
  },
53
  {
54
+ "epoch": 0.03,
55
+ "learning_rate": 1.6853932584269663e-07,
56
+ "logits/chosen": -2.7569632530212402,
57
+ "logits/rejected": -2.7449216842651367,
58
+ "logps/chosen": -318.72857666015625,
59
+ "logps/rejected": -187.9712371826172,
60
+ "loss": 0.3448,
61
+ "rewards/accuracies": 0.637499988079071,
62
+ "rewards/chosen": 0.008794652298092842,
63
+ "rewards/margins": 0.01837952807545662,
64
+ "rewards/rejected": -0.009584875777363777,
65
  "step": 30
66
  },
67
  {
68
+ "epoch": 0.05,
69
+ "learning_rate": 2.2471910112359549e-07,
70
+ "logits/chosen": -2.7657992839813232,
71
+ "logits/rejected": -2.7251369953155518,
72
+ "logps/chosen": -361.417236328125,
73
+ "logps/rejected": -208.3668975830078,
74
+ "loss": 0.3535,
75
+ "rewards/accuracies": 0.668749988079071,
76
+ "rewards/chosen": 0.02502177096903324,
77
+ "rewards/margins": 0.06268725544214249,
78
+ "rewards/rejected": -0.037665486335754395,
79
  "step": 40
80
  },
81
  {
82
+ "epoch": 0.06,
83
+ "learning_rate": 2.8089887640449437e-07,
84
+ "logits/chosen": -2.672497272491455,
85
+ "logits/rejected": -2.6617045402526855,
86
+ "logps/chosen": -290.23529052734375,
87
+ "logps/rejected": -174.43826293945312,
88
+ "loss": 0.3593,
89
+ "rewards/accuracies": 0.6875,
90
+ "rewards/chosen": 0.020228449255228043,
91
+ "rewards/margins": 0.11420907080173492,
92
+ "rewards/rejected": -0.09398062527179718,
93
  "step": 50
94
  },
95
  {
96
+ "epoch": 0.07,
97
+ "learning_rate": 3.3707865168539325e-07,
98
+ "logits/chosen": -2.5486276149749756,
99
+ "logits/rejected": -2.540907621383667,
100
+ "logps/chosen": -329.50079345703125,
101
+ "logps/rejected": -229.45065307617188,
102
+ "loss": 0.3584,
103
+ "rewards/accuracies": 0.643750011920929,
104
+ "rewards/chosen": -0.030313704162836075,
105
+ "rewards/margins": 0.17366722226142883,
106
+ "rewards/rejected": -0.2039809226989746,
107
  "step": 60
108
  },
109
  {
110
+ "epoch": 0.08,
111
+ "learning_rate": 3.9325842696629214e-07,
112
+ "logits/chosen": -2.5654215812683105,
113
+ "logits/rejected": -2.5420703887939453,
114
+ "logps/chosen": -344.0436096191406,
115
+ "logps/rejected": -257.1449890136719,
116
+ "loss": 0.3532,
117
+ "rewards/accuracies": 0.668749988079071,
118
+ "rewards/chosen": -0.13107505440711975,
119
+ "rewards/margins": 0.29146069288253784,
120
+ "rewards/rejected": -0.4225357472896576,
121
  "step": 70
122
  },
123
  {
124
+ "epoch": 0.09,
125
+ "learning_rate": 4.4943820224719097e-07,
126
+ "logits/chosen": -2.470280170440674,
127
+ "logits/rejected": -2.4680607318878174,
128
+ "logps/chosen": -402.6678161621094,
129
+ "logps/rejected": -240.67776489257812,
130
+ "loss": 0.3141,
131
+ "rewards/accuracies": 0.65625,
132
+ "rewards/chosen": -0.19497649371623993,
133
+ "rewards/margins": 0.45426544547080994,
134
+ "rewards/rejected": -0.6492420434951782,
135
  "step": 80
136
  },
137
  {
138
+ "epoch": 0.1,
139
+ "learning_rate": 4.999980431020109e-07,
140
+ "logits/chosen": -2.4644992351531982,
141
+ "logits/rejected": -2.436156749725342,
142
+ "logps/chosen": -366.55352783203125,
143
+ "logps/rejected": -270.9005432128906,
144
+ "loss": 0.2824,
145
+ "rewards/accuracies": 0.7749999761581421,
146
+ "rewards/chosen": -0.16562625765800476,
147
+ "rewards/margins": 0.6153150796890259,
148
+ "rewards/rejected": -0.780941367149353,
149
  "step": 90
150
  },
151
  {
152
+ "epoch": 0.11,
153
+ "learning_rate": 4.997632524101301e-07,
154
+ "logits/chosen": -2.5054173469543457,
155
+ "logits/rejected": -2.465770959854126,
156
+ "logps/chosen": -371.3381042480469,
157
+ "logps/rejected": -287.1797790527344,
158
+ "loss": 0.2588,
159
+ "rewards/accuracies": 0.71875,
160
+ "rewards/chosen": -0.4102350175380707,
161
+ "rewards/margins": 0.5942500829696655,
162
+ "rewards/rejected": -1.0044851303100586,
163
  "step": 100
164
  },
165
  {
166
+ "epoch": 0.11,
167
+ "eval_logits/chosen": -2.527438163757324,
168
+ "eval_logits/rejected": -2.5121896266937256,
169
+ "eval_logps/chosen": -321.84320068359375,
170
+ "eval_logps/rejected": -348.865478515625,
171
+ "eval_loss": 0.25303778052330017,
172
+ "eval_rewards/accuracies": 0.640625,
173
+ "eval_rewards/chosen": -0.6480357646942139,
174
+ "eval_rewards/margins": 0.2670864164829254,
175
+ "eval_rewards/rejected": -0.9151221513748169,
176
+ "eval_runtime": 53.627,
177
+ "eval_samples_per_second": 37.295,
178
+ "eval_steps_per_second": 0.597,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 0.12,
183
+ "learning_rate": 4.991375032514749e-07,
184
+ "logits/chosen": -2.4942123889923096,
185
+ "logits/rejected": -2.4537932872772217,
186
+ "logps/chosen": -366.0484924316406,
187
+ "logps/rejected": -300.3791198730469,
188
+ "loss": 0.218,
189
+ "rewards/accuracies": 0.699999988079071,
190
+ "rewards/chosen": -0.7532877326011658,
191
+ "rewards/margins": 0.5014825463294983,
192
+ "rewards/rejected": -1.254770278930664,
193
  "step": 110
194
  },
195
  {
196
+ "epoch": 0.14,
197
+ "learning_rate": 4.98121775121344e-07,
198
+ "logits/chosen": -2.4438962936401367,
199
+ "logits/rejected": -2.413790702819824,
200
+ "logps/chosen": -402.3663635253906,
201
+ "logps/rejected": -359.6197509765625,
202
+ "loss": 0.1769,
203
+ "rewards/accuracies": 0.768750011920929,
204
+ "rewards/chosen": -0.6919637322425842,
205
+ "rewards/margins": 0.7776089906692505,
206
+ "rewards/rejected": -1.46957266330719,
207
  "step": 120
208
  },
209
  {
210
+ "epoch": 0.15,
211
+ "learning_rate": 4.96717657955441e-07,
212
+ "logits/chosen": -2.395176649093628,
213
+ "logits/rejected": -2.347350597381592,
214
+ "logps/chosen": -435.8388671875,
215
+ "logps/rejected": -366.80767822265625,
216
+ "loss": 0.157,
217
+ "rewards/accuracies": 0.7124999761581421,
218
+ "rewards/chosen": -0.8537490963935852,
219
+ "rewards/margins": 0.8641785383224487,
220
+ "rewards/rejected": -1.7179279327392578,
221
  "step": 130
222
  },
223
  {
224
+ "epoch": 0.16,
225
+ "learning_rate": 4.949273496411216e-07,
226
+ "logits/chosen": -2.370129346847534,
227
+ "logits/rejected": -2.345968246459961,
228
+ "logps/chosen": -416.231689453125,
229
+ "logps/rejected": -379.8551330566406,
230
+ "loss": 0.1355,
231
+ "rewards/accuracies": 0.7875000238418579,
232
+ "rewards/chosen": -0.9431791305541992,
233
+ "rewards/margins": 0.9395672678947449,
234
+ "rewards/rejected": -1.8827464580535889,
235
  "step": 140
236
  },
237
  {
238
+ "epoch": 0.17,
239
+ "learning_rate": 4.927536525770046e-07,
240
+ "logits/chosen": -2.2446436882019043,
241
+ "logits/rejected": -2.176954507827759,
242
+ "logps/chosen": -439.6024475097656,
243
+ "logps/rejected": -401.255126953125,
244
+ "loss": 0.1276,
245
+ "rewards/accuracies": 0.75,
246
+ "rewards/chosen": -1.096846580505371,
247
+ "rewards/margins": 0.9727560877799988,
248
+ "rewards/rejected": -2.0696027278900146,
249
  "step": 150
250
  },
251
  {
252
+ "epoch": 0.18,
253
+ "learning_rate": 4.901999692863326e-07,
254
+ "logits/chosen": -2.210822582244873,
255
+ "logits/rejected": -2.1662418842315674,
256
+ "logps/chosen": -469.39190673828125,
257
+ "logps/rejected": -381.78662109375,
258
+ "loss": 0.1318,
259
+ "rewards/accuracies": 0.737500011920929,
260
+ "rewards/chosen": -1.149139165878296,
261
+ "rewards/margins": 0.8712302446365356,
262
+ "rewards/rejected": -2.020369291305542,
263
  "step": 160
264
  },
265
  {
266
+ "epoch": 0.19,
267
+ "learning_rate": 4.872702970909464e-07,
268
+ "logits/chosen": -2.2581284046173096,
269
+ "logits/rejected": -2.154148817062378,
270
+ "logps/chosen": -467.570556640625,
271
+ "logps/rejected": -428.92559814453125,
272
+ "loss": 0.1324,
273
+ "rewards/accuracies": 0.7749999761581421,
274
+ "rewards/chosen": -1.0990054607391357,
275
+ "rewards/margins": 1.353104829788208,
276
+ "rewards/rejected": -2.4521100521087646,
277
  "step": 170
278
  },
279
  {
280
+ "epoch": 0.2,
281
+ "learning_rate": 4.839692218542131e-07,
282
+ "logits/chosen": -2.1558468341827393,
283
+ "logits/rejected": -2.092869520187378,
284
+ "logps/chosen": -422.3897399902344,
285
+ "logps/rejected": -405.66558837890625,
286
+ "loss": 0.1499,
287
+ "rewards/accuracies": 0.800000011920929,
288
+ "rewards/chosen": -0.9482347369194031,
289
+ "rewards/margins": 1.135727047920227,
290
+ "rewards/rejected": -2.0839619636535645,
291
  "step": 180
292
  },
293
  {
294
+ "epoch": 0.22,
295
+ "learning_rate": 4.803019108026997e-07,
296
+ "logits/chosen": -2.2160496711730957,
297
+ "logits/rejected": -2.139484405517578,
298
+ "logps/chosen": -448.75732421875,
299
+ "logps/rejected": -376.4322204589844,
300
+ "loss": 0.1547,
301
+ "rewards/accuracies": 0.800000011920929,
302
+ "rewards/chosen": -0.621215283870697,
303
+ "rewards/margins": 1.1775684356689453,
304
+ "rewards/rejected": -1.7987838983535767,
305
  "step": 190
306
  },
307
  {
308
+ "epoch": 0.23,
309
+ "learning_rate": 4.7627410443782887e-07,
310
+ "logits/chosen": -2.189039468765259,
311
+ "logits/rejected": -2.098072052001953,
312
+ "logps/chosen": -424.6959533691406,
313
+ "logps/rejected": -381.8597106933594,
314
+ "loss": 0.1405,
315
+ "rewards/accuracies": 0.737500011920929,
316
+ "rewards/chosen": -1.002191424369812,
317
+ "rewards/margins": 0.9783417582511902,
318
+ "rewards/rejected": -1.980533242225647,
319
  "step": 200
320
  },
321
  {
322
+ "epoch": 0.23,
323
+ "eval_logits/chosen": -2.1455271244049072,
324
+ "eval_logits/rejected": -2.11315655708313,
325
+ "eval_logps/chosen": -420.6826171875,
326
+ "eval_logps/rejected": -474.0294189453125,
327
+ "eval_loss": 0.14227548241615295,
328
+ "eval_rewards/accuracies": 0.70703125,
329
+ "eval_rewards/chosen": -1.6364303827285767,
330
+ "eval_rewards/margins": 0.530331015586853,
331
+ "eval_rewards/rejected": -2.1667611598968506,
332
+ "eval_runtime": 53.4112,
333
+ "eval_samples_per_second": 37.445,
334
+ "eval_steps_per_second": 0.599,
335
+ "step": 200
336
+ },
337
+ {
338
+ "epoch": 0.24,
339
+ "learning_rate": 4.7189210755018034e-07,
340
+ "logits/chosen": -2.1224522590637207,
341
+ "logits/rejected": -2.068533182144165,
342
+ "logps/chosen": -491.70062255859375,
343
+ "logps/rejected": -460.06390380859375,
344
+ "loss": 0.1106,
345
+ "rewards/accuracies": 0.768750011920929,
346
+ "rewards/chosen": -1.3628227710723877,
347
+ "rewards/margins": 1.2269973754882812,
348
+ "rewards/rejected": -2.589820146560669,
349
  "step": 210
350
  },
351
  {
352
+ "epoch": 0.25,
353
+ "learning_rate": 4.671627793504988e-07,
354
+ "logits/chosen": -2.1610889434814453,
355
+ "logits/rejected": -2.081235408782959,
356
+ "logps/chosen": -464.84039306640625,
357
+ "logps/rejected": -423.46044921875,
358
+ "loss": 0.0991,
359
+ "rewards/accuracies": 0.8125,
360
+ "rewards/chosen": -1.2738369703292847,
361
+ "rewards/margins": 1.162742018699646,
362
+ "rewards/rejected": -2.4365792274475098,
363
  "step": 220
364
  },
365
  {
366
+ "epoch": 0.26,
367
+ "learning_rate": 4.6209352273286095e-07,
368
+ "logits/chosen": -2.0840981006622314,
369
+ "logits/rejected": -2.009742259979248,
370
+ "logps/chosen": -487.56707763671875,
371
+ "logps/rejected": -457.1775817871094,
372
+ "loss": 0.1024,
373
+ "rewards/accuracies": 0.7875000238418579,
374
+ "rewards/chosen": -1.396468162536621,
375
+ "rewards/margins": 1.1431598663330078,
376
+ "rewards/rejected": -2.539628267288208,
377
  "step": 230
378
  },
379
  {
380
+ "epoch": 0.27,
381
+ "learning_rate": 4.56692272686805e-07,
382
+ "logits/chosen": -2.0599420070648193,
383
+ "logits/rejected": -1.9713146686553955,
384
+ "logps/chosen": -526.182373046875,
385
+ "logps/rejected": -502.25628662109375,
386
+ "loss": 0.0906,
387
+ "rewards/accuracies": 0.8062499761581421,
388
+ "rewards/chosen": -1.8474916219711304,
389
+ "rewards/margins": 1.3336760997772217,
390
+ "rewards/rejected": -3.1811680793762207,
391
  "step": 240
392
  },
393
  {
394
+ "epoch": 0.28,
395
+ "learning_rate": 4.5096748387656326e-07,
396
+ "logits/chosen": -2.058642625808716,
397
+ "logits/rejected": -1.9906930923461914,
398
+ "logps/chosen": -461.1634216308594,
399
+ "logps/rejected": -468.45086669921875,
400
+ "loss": 0.0905,
401
+ "rewards/accuracies": 0.75,
402
+ "rewards/chosen": -1.5445082187652588,
403
+ "rewards/margins": 1.150217890739441,
404
+ "rewards/rejected": -2.6947262287139893,
405
  "step": 250
406
  },
407
  {
408
+ "epoch": 0.29,
409
+ "learning_rate": 4.4492811740683877e-07,
410
+ "logits/chosen": -2.1584465503692627,
411
+ "logits/rejected": -2.065124988555908,
412
+ "logps/chosen": -509.6869201660156,
413
+ "logps/rejected": -463.35711669921875,
414
+ "loss": 0.089,
415
+ "rewards/accuracies": 0.800000011920929,
416
+ "rewards/chosen": -1.6028919219970703,
417
+ "rewards/margins": 1.1219018697738647,
418
+ "rewards/rejected": -2.7247939109802246,
419
  "step": 260
420
  },
421
  {
422
+ "epoch": 0.31,
423
+ "learning_rate": 4.3858362679584354e-07,
424
+ "logits/chosen": -2.0787439346313477,
425
+ "logits/rejected": -1.9619861841201782,
426
+ "logps/chosen": -476.6595764160156,
427
+ "logps/rejected": -454.09503173828125,
428
+ "loss": 0.0805,
429
+ "rewards/accuracies": 0.75,
430
+ "rewards/chosen": -1.6048253774642944,
431
+ "rewards/margins": 1.1343061923980713,
432
+ "rewards/rejected": -2.739131450653076,
433
  "step": 270
434
  },
435
  {
436
+ "epoch": 0.32,
437
+ "learning_rate": 4.3194394317755245e-07,
438
+ "logits/chosen": -2.058042049407959,
439
+ "logits/rejected": -1.97593092918396,
440
+ "logps/chosen": -452.4337463378906,
441
+ "logps/rejected": -450.2781677246094,
442
+ "loss": 0.0989,
443
+ "rewards/accuracies": 0.731249988079071,
444
+ "rewards/chosen": -1.5429315567016602,
445
+ "rewards/margins": 1.1663429737091064,
446
+ "rewards/rejected": -2.7092742919921875,
447
  "step": 280
448
  },
449
  {
450
+ "epoch": 0.33,
451
+ "learning_rate": 4.2501945975633914e-07,
452
+ "logits/chosen": -2.0520236492156982,
453
+ "logits/rejected": -1.9288972616195679,
454
+ "logps/chosen": -527.3427124023438,
455
+ "logps/rejected": -463.68621826171875,
456
+ "loss": 0.0823,
457
+ "rewards/accuracies": 0.768750011920929,
458
+ "rewards/chosen": -1.7575832605361938,
459
+ "rewards/margins": 1.1834232807159424,
460
+ "rewards/rejected": -2.9410064220428467,
461
  "step": 290
462
  },
463
  {
464
+ "epoch": 0.34,
465
+ "learning_rate": 4.1782101553832405e-07,
466
+ "logits/chosen": -1.9703317880630493,
467
+ "logits/rejected": -1.8722751140594482,
468
+ "logps/chosen": -562.42333984375,
469
+ "logps/rejected": -565.2551879882812,
470
+ "loss": 0.0841,
471
+ "rewards/accuracies": 0.8062499761581421,
472
+ "rewards/chosen": -2.0587549209594727,
473
+ "rewards/margins": 1.456610918045044,
474
+ "rewards/rejected": -3.5153656005859375,
475
  "step": 300
476
  },
477
  {
478
+ "epoch": 0.34,
479
+ "eval_logits/chosen": -2.004652500152588,
480
+ "eval_logits/rejected": -1.9653990268707275,
481
+ "eval_logps/chosen": -485.7205505371094,
482
+ "eval_logps/rejected": -562.75634765625,
483
+ "eval_loss": 0.10304867476224899,
484
+ "eval_rewards/accuracies": 0.73828125,
485
+ "eval_rewards/chosen": -2.286808967590332,
486
+ "eval_rewards/margins": 0.7672209739685059,
487
+ "eval_rewards/rejected": -3.054029941558838,
488
+ "eval_runtime": 53.4196,
489
+ "eval_samples_per_second": 37.439,
490
+ "eval_steps_per_second": 0.599,
491
+ "step": 300
492
+ },
493
+ {
494
+ "epoch": 0.35,
495
+ "learning_rate": 4.103598783649029e-07,
496
+ "logits/chosen": -2.0496602058410645,
497
+ "logits/rejected": -1.9651731252670288,
498
+ "logps/chosen": -505.4261169433594,
499
+ "logps/rejected": -485.68310546875,
500
+ "loss": 0.0845,
501
+ "rewards/accuracies": 0.78125,
502
+ "rewards/chosen": -1.727452039718628,
503
+ "rewards/margins": 1.2490873336791992,
504
+ "rewards/rejected": -2.976539134979248,
505
  "step": 310
506
  },
507
  {
508
+ "epoch": 0.36,
509
+ "learning_rate": 4.026477272750119e-07,
510
+ "logits/chosen": -2.020448684692383,
511
+ "logits/rejected": -1.9249324798583984,
512
+ "logps/chosen": -544.9754638671875,
513
+ "logps/rejected": -536.6004028320312,
514
+ "loss": 0.0727,
515
+ "rewards/accuracies": 0.824999988079071,
516
+ "rewards/chosen": -1.8779780864715576,
517
+ "rewards/margins": 1.4059292078018188,
518
+ "rewards/rejected": -3.283907413482666,
519
  "step": 320
520
  },
521
  {
522
+ "epoch": 0.37,
523
+ "learning_rate": 3.9469663422373864e-07,
524
+ "logits/chosen": -2.028433322906494,
525
+ "logits/rejected": -1.9563522338867188,
526
+ "logps/chosen": -511.7374572753906,
527
+ "logps/rejected": -506.9154357910156,
528
+ "loss": 0.0726,
529
+ "rewards/accuracies": 0.762499988079071,
530
+ "rewards/chosen": -2.1345508098602295,
531
+ "rewards/margins": 1.1790883541107178,
532
+ "rewards/rejected": -3.3136394023895264,
533
  "step": 330
534
  },
535
  {
536
+ "epoch": 0.38,
537
+ "learning_rate": 3.865190451858954e-07,
538
+ "logits/chosen": -1.9980262517929077,
539
+ "logits/rejected": -1.9208223819732666,
540
+ "logps/chosen": -555.2127075195312,
541
+ "logps/rejected": -533.1873779296875,
542
+ "loss": 0.0701,
543
+ "rewards/accuracies": 0.793749988079071,
544
+ "rewards/chosen": -1.9280259609222412,
545
+ "rewards/margins": 1.4416617155075073,
546
+ "rewards/rejected": -3.369687557220459,
547
  "step": 340
548
  },
549
  {
550
+ "epoch": 0.4,
551
+ "learning_rate": 3.781277606741327e-07,
552
+ "logits/chosen": -2.0005669593811035,
553
+ "logits/rejected": -1.92441725730896,
554
+ "logps/chosen": -493.4820251464844,
555
+ "logps/rejected": -511.8729553222656,
556
+ "loss": 0.0836,
557
+ "rewards/accuracies": 0.78125,
558
+ "rewards/chosen": -2.063403606414795,
559
+ "rewards/margins": 1.1460031270980835,
560
+ "rewards/rejected": -3.209406614303589,
561
  "step": 350
562
  },
563
  {
564
+ "epoch": 0.41,
565
+ "learning_rate": 3.6953591570208996e-07,
566
+ "logits/chosen": -2.074608325958252,
567
+ "logits/rejected": -1.983313798904419,
568
+ "logps/chosen": -513.7328491210938,
569
+ "logps/rejected": -485.75518798828125,
570
+ "loss": 0.0911,
571
+ "rewards/accuracies": 0.7749999761581421,
572
+ "rewards/chosen": -1.652430772781372,
573
+ "rewards/margins": 1.3198424577713013,
574
+ "rewards/rejected": -2.9722726345062256,
575
  "step": 360
576
  },
577
  {
578
+ "epoch": 0.42,
579
+ "learning_rate": 3.607569592239452e-07,
580
+ "logits/chosen": -1.9775346517562866,
581
+ "logits/rejected": -1.8758798837661743,
582
+ "logps/chosen": -490.39208984375,
583
+ "logps/rejected": -476.412109375,
584
+ "loss": 0.0983,
585
+ "rewards/accuracies": 0.824999988079071,
586
+ "rewards/chosen": -1.5720148086547852,
587
+ "rewards/margins": 1.3585755825042725,
588
+ "rewards/rejected": -2.9305903911590576,
589
  "step": 370
590
  },
591
  {
592
+ "epoch": 0.43,
593
+ "learning_rate": 3.518046330825494e-07,
594
+ "logits/chosen": -1.9943759441375732,
595
+ "logits/rejected": -1.9340169429779053,
596
+ "logps/chosen": -556.3590087890625,
597
+ "logps/rejected": -578.3414306640625,
598
+ "loss": 0.0731,
599
+ "rewards/accuracies": 0.762499988079071,
600
+ "rewards/chosen": -2.216615676879883,
601
+ "rewards/margins": 1.2497565746307373,
602
+ "rewards/rejected": -3.46637225151062,
603
  "step": 380
604
  },
605
  {
606
+ "epoch": 0.44,
607
+ "learning_rate": 3.4269295049909713e-07,
608
+ "logits/chosen": -1.9704225063323975,
609
+ "logits/rejected": -1.9079980850219727,
610
+ "logps/chosen": -492.43743896484375,
611
+ "logps/rejected": -508.32781982421875,
612
+ "loss": 0.0893,
613
+ "rewards/accuracies": 0.75,
614
+ "rewards/chosen": -1.9127576351165771,
615
+ "rewards/margins": 1.1991008520126343,
616
+ "rewards/rejected": -3.111858367919922,
617
  "step": 390
618
  },
619
  {
620
+ "epoch": 0.45,
621
+ "learning_rate": 3.3343617413800453e-07,
622
+ "logits/chosen": -2.0571470260620117,
623
+ "logits/rejected": -1.9935848712921143,
624
+ "logps/chosen": -523.1668090820312,
625
+ "logps/rejected": -481.21844482421875,
626
+ "loss": 0.0916,
627
+ "rewards/accuracies": 0.793749988079071,
628
+ "rewards/chosen": -1.6335818767547607,
629
+ "rewards/margins": 1.325240969657898,
630
+ "rewards/rejected": -2.958822727203369,
631
  "step": 400
632
  },
633
+ {
634
+ "epoch": 0.45,
635
+ "eval_logits/chosen": -1.9811797142028809,
636
+ "eval_logits/rejected": -1.9393850564956665,
637
+ "eval_logps/chosen": -486.5964660644531,
638
+ "eval_logps/rejected": -566.7339477539062,
639
+ "eval_loss": 0.10801155120134354,
640
+ "eval_rewards/accuracies": 0.734375,
641
+ "eval_rewards/chosen": -2.2955682277679443,
642
+ "eval_rewards/margins": 0.7982385158538818,
643
+ "eval_rewards/rejected": -3.093806743621826,
644
+ "eval_runtime": 53.4276,
645
+ "eval_samples_per_second": 37.434,
646
+ "eval_steps_per_second": 0.599,
647
+ "step": 400
648
+ },
649
+ {
650
+ "epoch": 0.46,
651
+ "learning_rate": 3.2404879378132893e-07,
652
+ "logits/chosen": -1.9247922897338867,
653
+ "logits/rejected": -1.856414794921875,
654
+ "logps/chosen": -511.5186462402344,
655
+ "logps/rejected": -560.82177734375,
656
+ "loss": 0.0768,
657
+ "rewards/accuracies": 0.8125,
658
+ "rewards/chosen": -2.140298366546631,
659
+ "rewards/margins": 1.2967740297317505,
660
+ "rewards/rejected": -3.437072277069092,
661
+ "step": 410
662
+ },
663
+ {
664
+ "epoch": 0.48,
665
+ "learning_rate": 3.1454550364767894e-07,
666
+ "logits/chosen": -1.8825881481170654,
667
+ "logits/rejected": -1.7626222372055054,
668
+ "logps/chosen": -580.4396362304688,
669
+ "logps/rejected": -558.7975463867188,
670
+ "loss": 0.0629,
671
+ "rewards/accuracies": 0.7875000238418579,
672
+ "rewards/chosen": -2.2527565956115723,
673
+ "rewards/margins": 1.5961545705795288,
674
+ "rewards/rejected": -3.848911762237549,
675
+ "step": 420
676
+ },
677
+ {
678
+ "epoch": 0.49,
679
+ "learning_rate": 3.049411793911154e-07,
680
+ "logits/chosen": -1.886850357055664,
681
+ "logits/rejected": -1.7896614074707031,
682
+ "logps/chosen": -581.1607055664062,
683
+ "logps/rejected": -578.7238159179688,
684
+ "loss": 0.074,
685
+ "rewards/accuracies": 0.7749999761581421,
686
+ "rewards/chosen": -2.2639479637145996,
687
+ "rewards/margins": 1.3395296335220337,
688
+ "rewards/rejected": -3.6034774780273438,
689
+ "step": 430
690
+ },
691
+ {
692
+ "epoch": 0.5,
693
+ "learning_rate": 2.9525085481604914e-07,
694
+ "logits/chosen": -1.9346811771392822,
695
+ "logits/rejected": -1.8494796752929688,
696
+ "logps/chosen": -539.7020263671875,
697
+ "logps/rejected": -548.3917846679688,
698
+ "loss": 0.0813,
699
+ "rewards/accuracies": 0.8812500238418579,
700
+ "rewards/chosen": -1.9064857959747314,
701
+ "rewards/margins": 1.5988764762878418,
702
+ "rewards/rejected": -3.5053622722625732,
703
+ "step": 440
704
+ },
705
+ {
706
+ "epoch": 0.51,
707
+ "learning_rate": 2.854896983445833e-07,
708
+ "logits/chosen": -1.9939569234848022,
709
+ "logits/rejected": -1.8174670934677124,
710
+ "logps/chosen": -547.39111328125,
711
+ "logps/rejected": -516.7273559570312,
712
+ "loss": 0.0755,
713
+ "rewards/accuracies": 0.78125,
714
+ "rewards/chosen": -1.8019745349884033,
715
+ "rewards/margins": 1.4809798002243042,
716
+ "rewards/rejected": -3.282953977584839,
717
+ "step": 450
718
+ },
719
+ {
720
+ "epoch": 0.52,
721
+ "learning_rate": 2.7567298927313654e-07,
722
+ "logits/chosen": -1.941044807434082,
723
+ "logits/rejected": -1.8167743682861328,
724
+ "logps/chosen": -517.2515869140625,
725
+ "logps/rejected": -488.5101013183594,
726
+ "loss": 0.0802,
727
+ "rewards/accuracies": 0.800000011920929,
728
+ "rewards/chosen": -1.866037130355835,
729
+ "rewards/margins": 1.3603498935699463,
730
+ "rewards/rejected": -3.2263870239257812,
731
+ "step": 460
732
+ },
733
+ {
734
+ "epoch": 0.53,
735
+ "learning_rate": 2.658160938555123e-07,
736
+ "logits/chosen": -1.8934457302093506,
737
+ "logits/rejected": -1.7888898849487305,
738
+ "logps/chosen": -532.8482055664062,
739
+ "logps/rejected": -564.9197998046875,
740
+ "loss": 0.0723,
741
+ "rewards/accuracies": 0.7749999761581421,
742
+ "rewards/chosen": -2.034968852996826,
743
+ "rewards/margins": 1.6071611642837524,
744
+ "rewards/rejected": -3.642129898071289,
745
+ "step": 470
746
+ },
747
+ {
748
+ "epoch": 0.54,
749
+ "learning_rate": 2.559344412498532e-07,
750
+ "logits/chosen": -1.9257526397705078,
751
+ "logits/rejected": -1.8294801712036133,
752
+ "logps/chosen": -558.8524169921875,
753
+ "logps/rejected": -521.4874877929688,
754
+ "loss": 0.0725,
755
+ "rewards/accuracies": 0.7749999761581421,
756
+ "rewards/chosen": -2.1087284088134766,
757
+ "rewards/margins": 1.3520355224609375,
758
+ "rewards/rejected": -3.460763454437256,
759
+ "step": 480
760
+ },
761
+ {
762
+ "epoch": 0.55,
763
+ "learning_rate": 2.460434993671294e-07,
764
+ "logits/chosen": -1.948136329650879,
765
+ "logits/rejected": -1.8431456089019775,
766
+ "logps/chosen": -519.21728515625,
767
+ "logps/rejected": -500.375,
768
+ "loss": 0.0792,
769
+ "rewards/accuracies": 0.7562500238418579,
770
+ "rewards/chosen": -1.9937260150909424,
771
+ "rewards/margins": 1.335012674331665,
772
+ "rewards/rejected": -3.3287386894226074,
773
+ "step": 490
774
+ },
775
+ {
776
+ "epoch": 0.57,
777
+ "learning_rate": 2.361587506589672e-07,
778
+ "logits/chosen": -1.966451644897461,
779
+ "logits/rejected": -1.8704181909561157,
780
+ "logps/chosen": -534.3973388671875,
781
+ "logps/rejected": -500.2060546875,
782
+ "loss": 0.0864,
783
+ "rewards/accuracies": 0.7562500238418579,
784
+ "rewards/chosen": -1.7493633031845093,
785
+ "rewards/margins": 1.3577125072479248,
786
+ "rewards/rejected": -3.1070759296417236,
787
+ "step": 500
788
+ },
789
+ {
790
+ "epoch": 0.57,
791
+ "eval_logits/chosen": -1.955956220626831,
792
+ "eval_logits/rejected": -1.9095466136932373,
793
+ "eval_logps/chosen": -488.0345153808594,
794
+ "eval_logps/rejected": -570.5159912109375,
795
+ "eval_loss": 0.09556370228528976,
796
+ "eval_rewards/accuracies": 0.74609375,
797
+ "eval_rewards/chosen": -2.309948682785034,
798
+ "eval_rewards/margins": 0.8216789960861206,
799
+ "eval_rewards/rejected": -3.1316275596618652,
800
+ "eval_runtime": 53.4131,
801
+ "eval_samples_per_second": 37.444,
802
+ "eval_steps_per_second": 0.599,
803
+ "step": 500
804
+ },
805
+ {
806
+ "epoch": 0.58,
807
+ "learning_rate": 2.2629566788271613e-07,
808
+ "logits/chosen": -1.9134633541107178,
809
+ "logits/rejected": -1.8345638513565063,
810
+ "logps/chosen": -539.9205932617188,
811
+ "logps/rejected": -500.9922790527344,
812
+ "loss": 0.0694,
813
+ "rewards/accuracies": 0.768750011920929,
814
+ "rewards/chosen": -2.060006618499756,
815
+ "rewards/margins": 1.1839641332626343,
816
+ "rewards/rejected": -3.2439708709716797,
817
+ "step": 510
818
+ },
819
+ {
820
+ "epoch": 0.59,
821
+ "learning_rate": 2.1646968988169135e-07,
822
+ "logits/chosen": -1.9419372081756592,
823
+ "logits/rejected": -1.8542063236236572,
824
+ "logps/chosen": -510.229736328125,
825
+ "logps/rejected": -508.4148864746094,
826
+ "loss": 0.0739,
827
+ "rewards/accuracies": 0.831250011920929,
828
+ "rewards/chosen": -1.9082494974136353,
829
+ "rewards/margins": 1.5180120468139648,
830
+ "rewards/rejected": -3.4262614250183105,
831
+ "step": 520
832
+ },
833
+ {
834
+ "epoch": 0.6,
835
+ "learning_rate": 2.0669619741850232e-07,
836
+ "logits/chosen": -1.8845760822296143,
837
+ "logits/rejected": -1.7653076648712158,
838
+ "logps/chosen": -569.1829833984375,
839
+ "logps/rejected": -555.857666015625,
840
+ "loss": 0.0678,
841
+ "rewards/accuracies": 0.7875000238418579,
842
+ "rewards/chosen": -2.045509099960327,
843
+ "rewards/margins": 1.7232000827789307,
844
+ "rewards/rejected": -3.768709182739258,
845
+ "step": 530
846
+ },
847
+ {
848
+ "epoch": 0.61,
849
+ "learning_rate": 1.9699048909929518e-07,
850
+ "logits/chosen": -1.9015051126480103,
851
+ "logits/rejected": -1.8490610122680664,
852
+ "logps/chosen": -547.8323974609375,
853
+ "logps/rejected": -538.3102416992188,
854
+ "loss": 0.0707,
855
+ "rewards/accuracies": 0.737500011920929,
856
+ "rewards/chosen": -2.1869750022888184,
857
+ "rewards/margins": 1.2785483598709106,
858
+ "rewards/rejected": -3.4655234813690186,
859
+ "step": 540
860
+ },
861
+ {
862
+ "epoch": 0.62,
863
+ "learning_rate": 1.8736775742659732e-07,
864
+ "logits/chosen": -1.981376051902771,
865
+ "logits/rejected": -1.8454633951187134,
866
+ "logps/chosen": -589.72509765625,
867
+ "logps/rejected": -503.4346618652344,
868
+ "loss": 0.073,
869
+ "rewards/accuracies": 0.75,
870
+ "rewards/chosen": -1.9054787158966064,
871
+ "rewards/margins": 1.4781739711761475,
872
+ "rewards/rejected": -3.383652925491333,
873
+ "step": 550
874
+ },
875
+ {
876
+ "epoch": 0.63,
877
+ "learning_rate": 1.7784306501824616e-07,
878
+ "logits/chosen": -1.968064308166504,
879
+ "logits/rejected": -1.878286361694336,
880
+ "logps/chosen": -508.1175842285156,
881
+ "logps/rejected": -488.2872619628906,
882
+ "loss": 0.0735,
883
+ "rewards/accuracies": 0.7437499761581421,
884
+ "rewards/chosen": -2.008474826812744,
885
+ "rewards/margins": 1.3027125597000122,
886
+ "rewards/rejected": -3.311187267303467,
887
+ "step": 560
888
+ },
889
+ {
890
+ "epoch": 0.65,
891
+ "learning_rate": 1.6843132102963025e-07,
892
+ "logits/chosen": -1.9995759725570679,
893
+ "logits/rejected": -1.8707059621810913,
894
+ "logps/chosen": -571.5035400390625,
895
+ "logps/rejected": -551.8418579101562,
896
+ "loss": 0.0724,
897
+ "rewards/accuracies": 0.78125,
898
+ "rewards/chosen": -2.143575429916382,
899
+ "rewards/margins": 1.4678400754928589,
900
+ "rewards/rejected": -3.611415386199951,
901
+ "step": 570
902
+ },
903
+ {
904
+ "epoch": 0.66,
905
+ "learning_rate": 1.591472578161458e-07,
906
+ "logits/chosen": -1.9438421726226807,
907
+ "logits/rejected": -1.8859055042266846,
908
+ "logps/chosen": -556.9771728515625,
909
+ "logps/rejected": -608.1251220703125,
910
+ "loss": 0.0776,
911
+ "rewards/accuracies": 0.800000011920929,
912
+ "rewards/chosen": -2.206200122833252,
913
+ "rewards/margins": 1.4931509494781494,
914
+ "rewards/rejected": -3.6993508338928223,
915
+ "step": 580
916
+ },
917
+ {
918
+ "epoch": 0.67,
919
+ "learning_rate": 1.5000540787240274e-07,
920
+ "logits/chosen": -1.992211937904358,
921
+ "logits/rejected": -1.881553053855896,
922
+ "logps/chosen": -552.2855224609375,
923
+ "logps/rejected": -532.5646362304688,
924
+ "loss": 0.0643,
925
+ "rewards/accuracies": 0.7749999761581421,
926
+ "rewards/chosen": -2.1940808296203613,
927
+ "rewards/margins": 1.3729619979858398,
928
+ "rewards/rejected": -3.567042589187622,
929
+ "step": 590
930
+ },
931
+ {
932
+ "epoch": 0.68,
933
+ "learning_rate": 1.410200810842749e-07,
934
+ "logits/chosen": -1.9430984258651733,
935
+ "logits/rejected": -1.8172378540039062,
936
+ "logps/chosen": -613.967529296875,
937
+ "logps/rejected": -598.5076904296875,
938
+ "loss": 0.065,
939
+ "rewards/accuracies": 0.800000011920929,
940
+ "rewards/chosen": -2.2278945446014404,
941
+ "rewards/margins": 1.6444116830825806,
942
+ "rewards/rejected": -3.8723063468933105,
943
+ "step": 600
944
+ },
945
+ {
946
+ "epoch": 0.68,
947
+ "eval_logits/chosen": -1.9405471086502075,
948
+ "eval_logits/rejected": -1.886796236038208,
949
+ "eval_logps/chosen": -542.6825561523438,
950
+ "eval_logps/rejected": -633.113525390625,
951
+ "eval_loss": 0.08488854765892029,
952
+ "eval_rewards/accuracies": 0.7265625,
953
+ "eval_rewards/chosen": -2.856428861618042,
954
+ "eval_rewards/margins": 0.9011733531951904,
955
+ "eval_rewards/rejected": -3.7576024532318115,
956
+ "eval_runtime": 53.397,
957
+ "eval_samples_per_second": 37.455,
958
+ "eval_steps_per_second": 0.599,
959
+ "step": 600
960
+ },
961
+ {
962
+ "epoch": 0.69,
963
+ "learning_rate": 1.322053423294041e-07,
964
+ "logits/chosen": -1.989729881286621,
965
+ "logits/rejected": -1.911266565322876,
966
+ "logps/chosen": -580.957275390625,
967
+ "logps/rejected": -575.7573852539062,
968
+ "loss": 0.0664,
969
+ "rewards/accuracies": 0.762499988079071,
970
+ "rewards/chosen": -2.4196839332580566,
971
+ "rewards/margins": 1.2277535200119019,
972
+ "rewards/rejected": -3.647437334060669,
973
+ "step": 610
974
+ },
975
+ {
976
+ "epoch": 0.7,
977
+ "learning_rate": 1.2357498946121905e-07,
978
+ "logits/chosen": -1.930645227432251,
979
+ "logits/rejected": -1.8115425109863281,
980
+ "logps/chosen": -580.1464233398438,
981
+ "logps/rejected": -578.2677612304688,
982
+ "loss": 0.0632,
983
+ "rewards/accuracies": 0.8187500238418579,
984
+ "rewards/chosen": -2.2879867553710938,
985
+ "rewards/margins": 1.716691017150879,
986
+ "rewards/rejected": -4.004677772521973,
987
+ "step": 620
988
+ },
989
+ {
990
+ "epoch": 0.71,
991
+ "learning_rate": 1.1514253171093161e-07,
992
+ "logits/chosen": -1.9917194843292236,
993
+ "logits/rejected": -1.8586351871490479,
994
+ "logps/chosen": -593.2454833984375,
995
+ "logps/rejected": -554.43115234375,
996
+ "loss": 0.0681,
997
+ "rewards/accuracies": 0.7749999761581421,
998
+ "rewards/chosen": -2.304713726043701,
999
+ "rewards/margins": 1.4881376028060913,
1000
+ "rewards/rejected": -3.792851209640503,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 0.72,
1005
+ "learning_rate": 1.0692116854131883e-07,
1006
+ "logits/chosen": -1.963966727256775,
1007
+ "logits/rejected": -1.876705527305603,
1008
+ "logps/chosen": -572.40625,
1009
+ "logps/rejected": -571.9288940429688,
1010
+ "loss": 0.07,
1011
+ "rewards/accuracies": 0.7875000238418579,
1012
+ "rewards/chosen": -2.363751173019409,
1013
+ "rewards/margins": 1.4736993312835693,
1014
+ "rewards/rejected": -3.8374505043029785,
1015
+ "step": 640
1016
+ },
1017
+ {
1018
+ "epoch": 0.74,
1019
+ "learning_rate": 9.89237689853889e-08,
1020
+ "logits/chosen": -1.9835160970687866,
1021
+ "logits/rejected": -1.9168879985809326,
1022
+ "logps/chosen": -562.7428588867188,
1023
+ "logps/rejected": -593.8363037109375,
1024
+ "loss": 0.0689,
1025
+ "rewards/accuracies": 0.768750011920929,
1026
+ "rewards/chosen": -2.1839652061462402,
1027
+ "rewards/margins": 1.3724231719970703,
1028
+ "rewards/rejected": -3.5563888549804688,
1029
+ "step": 650
1030
+ },
1031
+ {
1032
+ "epoch": 0.75,
1033
+ "learning_rate": 9.11628515022765e-08,
1034
+ "logits/chosen": -2.0633797645568848,
1035
+ "logits/rejected": -1.9203882217407227,
1036
+ "logps/chosen": -586.35009765625,
1037
+ "logps/rejected": -529.8253173828125,
1038
+ "loss": 0.0734,
1039
+ "rewards/accuracies": 0.800000011920929,
1040
+ "rewards/chosen": -2.0526123046875,
1041
+ "rewards/margins": 1.624458909034729,
1042
+ "rewards/rejected": -3.6770706176757812,
1043
+ "step": 660
1044
+ },
1045
+ {
1046
+ "epoch": 0.76,
1047
+ "learning_rate": 8.365056438189486e-08,
1048
+ "logits/chosen": -2.0234529972076416,
1049
+ "logits/rejected": -1.9365705251693726,
1050
+ "logps/chosen": -527.53662109375,
1051
+ "logps/rejected": -542.6231689453125,
1052
+ "loss": 0.0723,
1053
+ "rewards/accuracies": 0.731249988079071,
1054
+ "rewards/chosen": -2.2389750480651855,
1055
+ "rewards/margins": 1.2195186614990234,
1056
+ "rewards/rejected": -3.45849347114563,
1057
+ "step": 670
1058
+ },
1059
+ {
1060
+ "epoch": 0.77,
1061
+ "learning_rate": 7.639866672902101e-08,
1062
+ "logits/chosen": -1.9303470849990845,
1063
+ "logits/rejected": -1.8580068349838257,
1064
+ "logps/chosen": -554.188720703125,
1065
+ "logps/rejected": -569.6090698242188,
1066
+ "loss": 0.0703,
1067
+ "rewards/accuracies": 0.762499988079071,
1068
+ "rewards/chosen": -2.2495415210723877,
1069
+ "rewards/margins": 1.5471585988998413,
1070
+ "rewards/rejected": -3.7967002391815186,
1071
+ "step": 680
1072
+ },
1073
+ {
1074
+ "epoch": 0.78,
1075
+ "learning_rate": 6.941851005657851e-08,
1076
+ "logits/chosen": -2.0182881355285645,
1077
+ "logits/rejected": -1.9131933450698853,
1078
+ "logps/chosen": -571.5795288085938,
1079
+ "logps/rejected": -563.0739135742188,
1080
+ "loss": 0.0668,
1081
+ "rewards/accuracies": 0.8062499761581421,
1082
+ "rewards/chosen": -2.189399242401123,
1083
+ "rewards/margins": 1.6238410472869873,
1084
+ "rewards/rejected": -3.8132405281066895,
1085
+ "step": 690
1086
+ },
1087
+ {
1088
+ "epoch": 0.79,
1089
+ "learning_rate": 6.272102051693051e-08,
1090
+ "logits/chosen": -1.9295063018798828,
1091
+ "logits/rejected": -1.8629966974258423,
1092
+ "logps/chosen": -551.6529541015625,
1093
+ "logps/rejected": -583.9489135742188,
1094
+ "loss": 0.0663,
1095
+ "rewards/accuracies": 0.7749999761581421,
1096
+ "rewards/chosen": -2.4394476413726807,
1097
+ "rewards/margins": 1.471626877784729,
1098
+ "rewards/rejected": -3.91107439994812,
1099
+ "step": 700
1100
+ },
1101
+ {
1102
+ "epoch": 0.79,
1103
+ "eval_logits/chosen": -1.962702751159668,
1104
+ "eval_logits/rejected": -1.9099680185317993,
1105
+ "eval_logps/chosen": -538.3110961914062,
1106
+ "eval_logps/rejected": -630.755615234375,
1107
+ "eval_loss": 0.08400725573301315,
1108
+ "eval_rewards/accuracies": 0.73828125,
1109
+ "eval_rewards/chosen": -2.812714099884033,
1110
+ "eval_rewards/margins": 0.921308696269989,
1111
+ "eval_rewards/rejected": -3.734023094177246,
1112
+ "eval_runtime": 53.3939,
1113
+ "eval_samples_per_second": 37.457,
1114
+ "eval_steps_per_second": 0.599,
1115
+ "step": 700
1116
+ },
1117
+ {
1118
+ "epoch": 0.8,
1119
+ "learning_rate": 5.6316681798995844e-08,
1120
+ "logits/chosen": -1.998659372329712,
1121
+ "logits/rejected": -1.9016752243041992,
1122
+ "logps/chosen": -596.6257934570312,
1123
+ "logps/rejected": -589.1134033203125,
1124
+ "loss": 0.0751,
1125
+ "rewards/accuracies": 0.793749988079071,
1126
+ "rewards/chosen": -2.465219020843506,
1127
+ "rewards/margins": 1.4250514507293701,
1128
+ "rewards/rejected": -3.890270948410034,
1129
+ "step": 710
1130
+ },
1131
+ {
1132
+ "epoch": 0.81,
1133
+ "learning_rate": 5.0215518717961256e-08,
1134
+ "logits/chosen": -1.9532493352890015,
1135
+ "logits/rejected": -1.861000418663025,
1136
+ "logps/chosen": -530.2694091796875,
1137
+ "logps/rejected": -542.3233642578125,
1138
+ "loss": 0.0666,
1139
+ "rewards/accuracies": 0.737500011920929,
1140
+ "rewards/chosen": -2.3267478942871094,
1141
+ "rewards/margins": 1.403226613998413,
1142
+ "rewards/rejected": -3.7299742698669434,
1143
+ "step": 720
1144
+ },
1145
+ {
1146
+ "epoch": 0.83,
1147
+ "learning_rate": 4.4427081523275925e-08,
1148
+ "logits/chosen": -1.9771426916122437,
1149
+ "logits/rejected": -1.8616434335708618,
1150
+ "logps/chosen": -516.3856201171875,
1151
+ "logps/rejected": -545.8884887695312,
1152
+ "loss": 0.0705,
1153
+ "rewards/accuracies": 0.7875000238418579,
1154
+ "rewards/chosen": -2.18568754196167,
1155
+ "rewards/margins": 1.4953739643096924,
1156
+ "rewards/rejected": -3.6810615062713623,
1157
+ "step": 730
1158
+ },
1159
+ {
1160
+ "epoch": 0.84,
1161
+ "learning_rate": 3.896043094949061e-08,
1162
+ "logits/chosen": -1.9744873046875,
1163
+ "logits/rejected": -1.8698228597640991,
1164
+ "logps/chosen": -564.7574462890625,
1165
+ "logps/rejected": -554.6087036132812,
1166
+ "loss": 0.0707,
1167
+ "rewards/accuracies": 0.762499988079071,
1168
+ "rewards/chosen": -2.351306438446045,
1169
+ "rewards/margins": 1.3112602233886719,
1170
+ "rewards/rejected": -3.662567138671875,
1171
+ "step": 740
1172
+ },
1173
+ {
1174
+ "epoch": 0.85,
1175
+ "learning_rate": 3.3824124033343557e-08,
1176
+ "logits/chosen": -1.971381425857544,
1177
+ "logits/rejected": -1.8558752536773682,
1178
+ "logps/chosen": -550.445556640625,
1179
+ "logps/rejected": -536.169921875,
1180
+ "loss": 0.0663,
1181
+ "rewards/accuracies": 0.768750011920929,
1182
+ "rewards/chosen": -2.283639430999756,
1183
+ "rewards/margins": 1.291291356086731,
1184
+ "rewards/rejected": -3.5749306678771973,
1185
+ "step": 750
1186
+ },
1187
+ {
1188
+ "epoch": 0.86,
1189
+ "learning_rate": 2.9026200719291904e-08,
1190
+ "logits/chosen": -1.8978159427642822,
1191
+ "logits/rejected": -1.8259022235870361,
1192
+ "logps/chosen": -486.37152099609375,
1193
+ "logps/rejected": -516.701416015625,
1194
+ "loss": 0.0672,
1195
+ "rewards/accuracies": 0.8062499761581421,
1196
+ "rewards/chosen": -2.1406826972961426,
1197
+ "rewards/margins": 1.2746713161468506,
1198
+ "rewards/rejected": -3.4153542518615723,
1199
+ "step": 760
1200
+ },
1201
+ {
1202
+ "epoch": 0.87,
1203
+ "learning_rate": 2.4574171274456433e-08,
1204
+ "logits/chosen": -1.9380009174346924,
1205
+ "logits/rejected": -1.7994283437728882,
1206
+ "logps/chosen": -570.5930786132812,
1207
+ "logps/rejected": -562.5328369140625,
1208
+ "loss": 0.0668,
1209
+ "rewards/accuracies": 0.7875000238418579,
1210
+ "rewards/chosen": -2.2663681507110596,
1211
+ "rewards/margins": 1.4617531299591064,
1212
+ "rewards/rejected": -3.728121519088745,
1213
+ "step": 770
1214
+ },
1215
+ {
1216
+ "epoch": 0.88,
1217
+ "learning_rate": 2.047500453267881e-08,
1218
+ "logits/chosen": -1.9811604022979736,
1219
+ "logits/rejected": -1.8174915313720703,
1220
+ "logps/chosen": -585.8560180664062,
1221
+ "logps/rejected": -575.7230834960938,
1222
+ "loss": 0.0723,
1223
+ "rewards/accuracies": 0.78125,
1224
+ "rewards/chosen": -2.2959883213043213,
1225
+ "rewards/margins": 1.562656283378601,
1226
+ "rewards/rejected": -3.858644485473633,
1227
+ "step": 780
1228
+ },
1229
+ {
1230
+ "epoch": 0.89,
1231
+ "learning_rate": 1.673511698609292e-08,
1232
+ "logits/chosen": -1.9835258722305298,
1233
+ "logits/rejected": -1.8875339031219482,
1234
+ "logps/chosen": -577.6177978515625,
1235
+ "logps/rejected": -575.160400390625,
1236
+ "loss": 0.074,
1237
+ "rewards/accuracies": 0.793749988079071,
1238
+ "rewards/chosen": -2.322665214538574,
1239
+ "rewards/margins": 1.4801914691925049,
1240
+ "rewards/rejected": -3.8028564453125,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 0.91,
1245
+ "learning_rate": 1.3360362741285769e-08,
1246
+ "logits/chosen": -1.877092719078064,
1247
+ "logits/rejected": -1.8041622638702393,
1248
+ "logps/chosen": -503.6748962402344,
1249
+ "logps/rejected": -512.8577270507812,
1250
+ "loss": 0.0663,
1251
+ "rewards/accuracies": 0.731249988079071,
1252
+ "rewards/chosen": -2.3050992488861084,
1253
+ "rewards/margins": 1.193604826927185,
1254
+ "rewards/rejected": -3.498703718185425,
1255
+ "step": 800
1256
+ },
1257
+ {
1258
+ "epoch": 0.91,
1259
+ "eval_logits/chosen": -1.9548935890197754,
1260
+ "eval_logits/rejected": -1.901588797569275,
1261
+ "eval_logps/chosen": -527.3272705078125,
1262
+ "eval_logps/rejected": -619.4178466796875,
1263
+ "eval_loss": 0.0869935154914856,
1264
+ "eval_rewards/accuracies": 0.74609375,
1265
+ "eval_rewards/chosen": -2.702876567840576,
1266
+ "eval_rewards/margins": 0.9177693128585815,
1267
+ "eval_rewards/rejected": -3.620645761489868,
1268
+ "eval_runtime": 53.3906,
1269
+ "eval_samples_per_second": 37.46,
1270
+ "eval_steps_per_second": 0.599,
1271
+ "step": 800
1272
+ },
1273
+ {
1274
+ "epoch": 0.92,
1275
+ "learning_rate": 1.0356024355769433e-08,
1276
+ "logits/chosen": -1.9531776905059814,
1277
+ "logits/rejected": -1.8769395351409912,
1278
+ "logps/chosen": -578.2391357421875,
1279
+ "logps/rejected": -570.4874267578125,
1280
+ "loss": 0.0724,
1281
+ "rewards/accuracies": 0.762499988079071,
1282
+ "rewards/chosen": -2.243154287338257,
1283
+ "rewards/margins": 1.2938239574432373,
1284
+ "rewards/rejected": -3.5369784832000732,
1285
+ "step": 810
1286
+ },
1287
+ {
1288
+ "epoch": 0.93,
1289
+ "learning_rate": 7.726804569108597e-09,
1290
+ "logits/chosen": -1.908215880393982,
1291
+ "logits/rejected": -1.777242660522461,
1292
+ "logps/chosen": -549.6116333007812,
1293
+ "logps/rejected": -539.8973999023438,
1294
+ "loss": 0.0635,
1295
+ "rewards/accuracies": 0.768750011920929,
1296
+ "rewards/chosen": -2.2482357025146484,
1297
+ "rewards/margins": 1.3119279146194458,
1298
+ "rewards/rejected": -3.5601630210876465,
1299
+ "step": 820
1300
+ },
1301
+ {
1302
+ "epoch": 0.94,
1303
+ "learning_rate": 5.476818941645561e-09,
1304
+ "logits/chosen": -1.9380836486816406,
1305
+ "logits/rejected": -1.8248558044433594,
1306
+ "logps/chosen": -566.2852172851562,
1307
+ "logps/rejected": -571.448974609375,
1308
+ "loss": 0.0626,
1309
+ "rewards/accuracies": 0.768750011920929,
1310
+ "rewards/chosen": -2.232658863067627,
1311
+ "rewards/margins": 1.5336438417434692,
1312
+ "rewards/rejected": -3.7663028240203857,
1313
+ "step": 830
1314
+ },
1315
+ {
1316
+ "epoch": 0.95,
1317
+ "learning_rate": 3.609589412347347e-09,
1318
+ "logits/chosen": -1.99057137966156,
1319
+ "logits/rejected": -1.887397050857544,
1320
+ "logps/chosen": -581.9739990234375,
1321
+ "logps/rejected": -585.4699096679688,
1322
+ "loss": 0.0769,
1323
+ "rewards/accuracies": 0.8125,
1324
+ "rewards/chosen": -2.106590747833252,
1325
+ "rewards/margins": 1.779883623123169,
1326
+ "rewards/rejected": -3.886474132537842,
1327
+ "step": 840
1328
+ },
1329
+ {
1330
+ "epoch": 0.96,
1331
+ "learning_rate": 2.1280387858572667e-09,
1332
+ "logits/chosen": -1.925265908241272,
1333
+ "logits/rejected": -1.8459047079086304,
1334
+ "logps/chosen": -552.0723876953125,
1335
+ "logps/rejected": -599.8142700195312,
1336
+ "loss": 0.0739,
1337
+ "rewards/accuracies": 0.8187500238418579,
1338
+ "rewards/chosen": -2.3117988109588623,
1339
+ "rewards/margins": 1.3644450902938843,
1340
+ "rewards/rejected": -3.676244020462036,
1341
+ "step": 850
1342
+ },
1343
+ {
1344
+ "epoch": 0.97,
1345
+ "learning_rate": 1.03448615738172e-09,
1346
+ "logits/chosen": -1.9360536336898804,
1347
+ "logits/rejected": -1.8282134532928467,
1348
+ "logps/chosen": -535.2977294921875,
1349
+ "logps/rejected": -555.2935791015625,
1350
+ "loss": 0.0656,
1351
+ "rewards/accuracies": 0.8187500238418579,
1352
+ "rewards/chosen": -2.1976685523986816,
1353
+ "rewards/margins": 1.5509238243103027,
1354
+ "rewards/rejected": -3.748591899871826,
1355
+ "step": 860
1356
+ },
1357
+ {
1358
+ "epoch": 0.98,
1359
+ "learning_rate": 3.3064328257259575e-10,
1360
+ "logits/chosen": -1.9918015003204346,
1361
+ "logits/rejected": -1.8840529918670654,
1362
+ "logps/chosen": -587.255615234375,
1363
+ "logps/rejected": -566.5502319335938,
1364
+ "loss": 0.0679,
1365
+ "rewards/accuracies": 0.768750011920929,
1366
+ "rewards/chosen": -2.329481840133667,
1367
+ "rewards/margins": 1.393025279045105,
1368
+ "rewards/rejected": -3.7225069999694824,
1369
+ "step": 870
1370
+ },
1371
+ {
1372
+ "epoch": 1.0,
1373
+ "learning_rate": 1.7611898088715216e-11,
1374
+ "logits/chosen": -2.004169225692749,
1375
+ "logits/rejected": -1.8871746063232422,
1376
+ "logps/chosen": -655.3201293945312,
1377
+ "logps/rejected": -616.4334106445312,
1378
+ "loss": 0.0723,
1379
+ "rewards/accuracies": 0.800000011920929,
1380
+ "rewards/chosen": -2.3969874382019043,
1381
+ "rewards/margins": 1.6344892978668213,
1382
+ "rewards/rejected": -4.031477451324463,
1383
+ "step": 880
1384
+ },
1385
  {
1386
  "epoch": 1.0,
1387
+ "step": 883,
1388
  "total_flos": 0.0,
1389
+ "train_loss": 0.11349766382510908,
1390
+ "train_runtime": 8005.048,
1391
+ "train_samples_per_second": 14.12,
1392
+ "train_steps_per_second": 0.11
1393
  }
1394
  ],
1395
  "logging_steps": 10,
1396
+ "max_steps": 883,
1397
  "num_train_epochs": 1,
1398
+ "save_steps": 100,
1399
  "total_flos": 0.0,
1400
  "trial_name": null,
1401
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d05f2ea4a8f27ac4989592d034e456f8fe99958c58d076bd3ccb965c582e16a
3
  size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34facb59b2833ff9a65d1ea6ca0671f7143189081be77d079ad67a7343d5aa7d
3
  size 5944