RikkiXu commited on
Commit
9f518c0
1 Parent(s): d58bba0

Model save

Browse files
README.md CHANGED
@@ -14,16 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # zephyr-7b-dpo-full
15
 
16
  This model was trained from scratch on the None dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 0.6931
19
- - Rewards/chosen: -8.4881
20
- - Rewards/rejected: -8.4881
21
- - Rewards/accuracies: 0.0
22
- - Rewards/margins: 0.0
23
- - Logps/rejected: -164.0651
24
- - Logps/chosen: -164.0651
25
- - Logits/rejected: -3.2224
26
- - Logits/chosen: -3.2224
27
 
28
  ## Model description
29
 
@@ -48,8 +38,8 @@ The following hyperparameters were used during training:
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
  - num_devices: 8
51
- - gradient_accumulation_steps: 2
52
- - total_train_batch_size: 128
53
  - total_eval_batch_size: 64
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: cosine
@@ -58,11 +48,6 @@ The following hyperparameters were used during training:
58
 
59
  ### Training results
60
 
61
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.7723 | 0.29 | 100 | 0.6931 | -2.0247 | -2.0247 | 0.0 | 0.0 | -157.6017 | -157.6017 | -3.1472 | -3.1472 |
64
- | 0.7717 | 0.57 | 200 | 0.6931 | -12.0830 | -12.0830 | 0.0 | 0.0 | -167.6601 | -167.6601 | -3.1635 | -3.1635 |
65
- | 0.782 | 0.86 | 300 | 0.6931 | -8.4881 | -8.4881 | 0.0 | 0.0 | -164.0651 | -164.0651 | -3.2224 | -3.2224 |
66
 
67
 
68
  ### Framework versions
 
14
  # zephyr-7b-dpo-full
15
 
16
  This model was trained from scratch on the None dataset.
 
 
 
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
38
  - seed: 42
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
+ - gradient_accumulation_steps: 4
42
+ - total_train_batch_size: 256
43
  - total_eval_batch_size: 64
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
 
48
 
49
  ### Training results
50
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.7428068714482444,
4
- "train_runtime": 5299.044,
5
- "train_samples": 44755,
6
- "train_samples_per_second": 8.446,
7
- "train_steps_per_second": 0.066
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.42979541909757746,
4
+ "train_runtime": 5368.3646,
5
+ "train_samples": 48530,
6
+ "train_samples_per_second": 9.04,
7
+ "train_steps_per_second": 0.035
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8a16dcdeac389a7edb934e74708a95a01cd1e632e8f90a73780fef9e4ba9a79
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:356dc2b1355d662c141aef81f6cc89001c178e4007968f89b8978b8150436157
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:830c15b3f985c15ad7fd7fed750f2e2465dfc40ee9aa0da31053db52e903da60
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f1d3e3ba731f6817b54fbce899547aa3234b1ac6c106bb71917516260d9eb90
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9abe98aec4f8efa4737df5455a2ed808d0f8ebad162d17a1674f84f71648f8d
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e0cc8151a7384be178ba6186d9b46fe0d7d5bcec31517e8e6d3b801f63aec5
3
  size 4540532728
runs/Jun05_11-04-00_n136-100-194/events.out.tfevents.1717556770.n136-100-194.336424.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18294c31a9b5580578ded35199e3f571e65821fa389760efdc6ab371cc64ec7f
3
- size 12302
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:134fc9b4f27841d895937a26473765b3c2b570f6f97d40ce4d813ecf9ed2916b
3
+ size 18136
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.7428068714482444,
4
- "train_runtime": 5299.044,
5
- "train_samples": 44755,
6
- "train_samples_per_second": 8.446,
7
- "train_steps_per_second": 0.066
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.42979541909757746,
4
+ "train_runtime": 5368.3646,
5
+ "train_samples": 48530,
6
+ "train_samples_per_second": 9.04,
7
+ "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "eval_steps": 100,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "grad_norm": 3039.0846042522635,
14
- "learning_rate": 1.4285714285714284e-08,
15
- "logits/chosen": -4.185730934143066,
16
- "logits/rejected": -4.509836196899414,
17
- "logps/chosen": -274.000732421875,
18
- "logps/rejected": -205.8054962158203,
19
- "loss": 0.6932,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,590 +24,287 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.03,
28
- "grad_norm": 3122.252138846549,
29
- "learning_rate": 1.4285714285714285e-07,
30
- "logits/chosen": -4.2117600440979,
31
- "logits/rejected": -4.4855546951293945,
32
- "logps/chosen": -318.3944396972656,
33
- "logps/rejected": -257.1120910644531,
34
- "loss": 0.7578,
35
- "rewards/accuracies": 0.4375,
36
- "rewards/chosen": 0.11668112874031067,
37
- "rewards/margins": -0.05277401953935623,
38
- "rewards/rejected": 0.1694551408290863,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.06,
43
- "grad_norm": 2683.55929188042,
44
- "learning_rate": 2.857142857142857e-07,
45
- "logits/chosen": -4.264363765716553,
46
- "logits/rejected": -4.5196099281311035,
47
- "logps/chosen": -303.1786193847656,
48
- "logps/rejected": -243.7255096435547,
49
- "loss": 0.5287,
50
- "rewards/accuracies": 0.7749999761581421,
51
- "rewards/chosen": 2.058025598526001,
52
- "rewards/margins": 1.5574162006378174,
53
- "rewards/rejected": 0.5006095767021179,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.09,
58
- "grad_norm": 1727.068886617789,
59
- "learning_rate": 4.285714285714285e-07,
60
- "logits/chosen": -4.307942867279053,
61
- "logits/rejected": -4.567526340484619,
62
- "logps/chosen": -299.24615478515625,
63
- "logps/rejected": -256.9350280761719,
64
- "loss": 0.4422,
65
- "rewards/accuracies": 0.831250011920929,
66
- "rewards/chosen": 7.268563747406006,
67
- "rewards/margins": 4.868700981140137,
68
- "rewards/rejected": 2.3998632431030273,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.11,
73
- "grad_norm": 2206.909408534419,
74
- "learning_rate": 4.996892303047305e-07,
75
- "logits/chosen": -4.312300682067871,
76
- "logits/rejected": -4.578764915466309,
77
- "logps/chosen": -288.2650451660156,
78
- "logps/rejected": -235.3504638671875,
79
- "loss": 0.5344,
80
- "rewards/accuracies": 0.8062499761581421,
81
- "rewards/chosen": 7.26935338973999,
82
- "rewards/margins": 7.414456367492676,
83
- "rewards/rejected": -0.145103320479393,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.14,
88
- "grad_norm": 1574.7884185445766,
89
- "learning_rate": 4.972077065562821e-07,
90
- "logits/chosen": -4.287051200866699,
91
- "logits/rejected": -4.532064914703369,
92
- "logps/chosen": -295.8678894042969,
93
- "logps/rejected": -256.9671325683594,
94
- "loss": 0.514,
95
- "rewards/accuracies": 0.8687499761581421,
96
- "rewards/chosen": 6.857720851898193,
97
- "rewards/margins": 10.064432144165039,
98
- "rewards/rejected": -3.206712007522583,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.17,
103
- "grad_norm": 2086.3490202582466,
104
- "learning_rate": 4.922693215572695e-07,
105
- "logits/chosen": -4.274147033691406,
106
- "logits/rejected": -4.500526428222656,
107
- "logps/chosen": -304.9868469238281,
108
- "logps/rejected": -266.1625061035156,
109
- "loss": 0.5936,
110
- "rewards/accuracies": 0.84375,
111
- "rewards/chosen": 6.364766597747803,
112
- "rewards/margins": 9.915193557739258,
113
- "rewards/rejected": -3.550427198410034,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.2,
118
- "grad_norm": 2489.785834029294,
119
- "learning_rate": 4.849231551964771e-07,
120
- "logits/chosen": -4.388774871826172,
121
- "logits/rejected": -4.523660659790039,
122
- "logps/chosen": -281.50799560546875,
123
- "logps/rejected": -240.20126342773438,
124
- "loss": 0.578,
125
- "rewards/accuracies": 0.84375,
126
- "rewards/chosen": 7.396153926849365,
127
- "rewards/margins": 10.37517261505127,
128
- "rewards/rejected": -2.9790191650390625,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.23,
133
- "grad_norm": 1795.0951767218703,
134
- "learning_rate": 4.752422169756047e-07,
135
- "logits/chosen": -4.22324275970459,
136
- "logits/rejected": -4.482357025146484,
137
- "logps/chosen": -289.7461853027344,
138
- "logps/rejected": -244.2007293701172,
139
- "loss": 0.6974,
140
- "rewards/accuracies": 0.875,
141
- "rewards/chosen": 5.9415082931518555,
142
- "rewards/margins": 9.690756797790527,
143
- "rewards/rejected": -3.749248504638672,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.26,
148
- "grad_norm": 1863.4883740900614,
149
- "learning_rate": 4.6332272040803887e-07,
150
- "logits/chosen": -4.147845268249512,
151
- "logits/rejected": -4.379548072814941,
152
- "logps/chosen": -301.5574645996094,
153
- "logps/rejected": -264.04815673828125,
154
- "loss": 0.6584,
155
- "rewards/accuracies": 0.8125,
156
- "rewards/chosen": 6.005797386169434,
157
- "rewards/margins": 10.730647087097168,
158
- "rewards/rejected": -4.724849700927734,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.29,
163
- "grad_norm": 2150.0529520665987,
164
- "learning_rate": 4.492831268057306e-07,
165
- "logits/chosen": -4.204574108123779,
166
- "logits/rejected": -4.426244735717773,
167
- "logps/chosen": -287.6278076171875,
168
- "logps/rejected": -244.3843231201172,
169
- "loss": 0.7723,
170
- "rewards/accuracies": 0.824999988079071,
171
- "rewards/chosen": 8.200125694274902,
172
- "rewards/margins": 10.383246421813965,
173
- "rewards/rejected": -2.1831212043762207,
174
- "step": 100
175
- },
176
- {
177
- "epoch": 0.29,
178
- "eval_logits/chosen": -3.147157669067383,
179
- "eval_logits/rejected": -3.147157669067383,
180
- "eval_logps/chosen": -157.6016845703125,
181
- "eval_logps/rejected": -157.6016845703125,
182
- "eval_loss": 0.6931471824645996,
183
- "eval_rewards/accuracies": 0.0,
184
- "eval_rewards/chosen": -2.0246658325195312,
185
- "eval_rewards/margins": 0.0,
186
- "eval_rewards/rejected": -2.0246658325195312,
187
- "eval_runtime": 1.5111,
188
- "eval_samples_per_second": 0.662,
189
- "eval_steps_per_second": 0.662,
190
  "step": 100
191
  },
192
  {
193
- "epoch": 0.31,
194
- "grad_norm": 2069.7674123460392,
195
- "learning_rate": 4.332629679574565e-07,
196
- "logits/chosen": -4.301981449127197,
197
- "logits/rejected": -4.562737464904785,
198
- "logps/chosen": -298.0445861816406,
199
- "logps/rejected": -258.07379150390625,
200
- "loss": 0.9782,
201
- "rewards/accuracies": 0.84375,
202
- "rewards/chosen": 5.386460304260254,
203
- "rewards/margins": 12.155603408813477,
204
- "rewards/rejected": -6.769143581390381,
205
  "step": 110
206
  },
207
  {
208
- "epoch": 0.34,
209
- "grad_norm": 2493.1141420840436,
210
- "learning_rate": 4.154214593992149e-07,
211
- "logits/chosen": -4.2862958908081055,
212
- "logits/rejected": -4.542973518371582,
213
- "logps/chosen": -285.873046875,
214
- "logps/rejected": -243.8842010498047,
215
- "loss": 0.9519,
216
- "rewards/accuracies": 0.8374999761581421,
217
- "rewards/chosen": 7.061153411865234,
218
- "rewards/margins": 11.827821731567383,
219
- "rewards/rejected": -4.766669273376465,
220
  "step": 120
221
  },
222
  {
223
- "epoch": 0.37,
224
- "grad_norm": 1751.0600688695415,
225
- "learning_rate": 3.959359180586975e-07,
226
- "logits/chosen": -4.280123710632324,
227
- "logits/rejected": -4.522739887237549,
228
- "logps/chosen": -277.5672302246094,
229
- "logps/rejected": -226.9970703125,
230
- "loss": 0.7208,
231
- "rewards/accuracies": 0.875,
232
- "rewards/chosen": 5.132817268371582,
233
- "rewards/margins": 12.189440727233887,
234
- "rewards/rejected": -7.056623935699463,
235
  "step": 130
236
  },
237
  {
238
- "epoch": 0.4,
239
- "grad_norm": 1830.8828386105183,
240
- "learning_rate": 3.75e-07,
241
- "logits/chosen": -4.162067413330078,
242
- "logits/rejected": -4.473877429962158,
243
- "logps/chosen": -294.2828369140625,
244
- "logps/rejected": -248.1217041015625,
245
- "loss": 0.8272,
246
- "rewards/accuracies": 0.887499988079071,
247
- "rewards/chosen": 5.600853443145752,
248
- "rewards/margins": 12.919031143188477,
249
- "rewards/rejected": -7.31817626953125,
250
  "step": 140
251
  },
252
  {
253
- "epoch": 0.43,
254
- "grad_norm": 1975.8832437780866,
255
- "learning_rate": 3.528217757826529e-07,
256
- "logits/chosen": -4.179337978363037,
257
- "logits/rejected": -4.442940711975098,
258
- "logps/chosen": -295.30914306640625,
259
- "logps/rejected": -259.9874267578125,
260
- "loss": 0.8642,
261
- "rewards/accuracies": 0.7749999761581421,
262
- "rewards/chosen": 4.101029872894287,
263
- "rewards/margins": 12.387203216552734,
264
- "rewards/rejected": -8.286172866821289,
265
  "step": 150
266
  },
267
  {
268
- "epoch": 0.46,
269
- "grad_norm": 1974.3387943820865,
270
- "learning_rate": 3.296216625629211e-07,
271
- "logits/chosen": -4.1478681564331055,
272
- "logits/rejected": -4.431545257568359,
273
- "logps/chosen": -294.5439758300781,
274
- "logps/rejected": -244.33657836914062,
275
- "loss": 0.9334,
276
- "rewards/accuracies": 0.8812500238418579,
277
- "rewards/chosen": 6.383957386016846,
278
- "rewards/margins": 13.940587997436523,
279
- "rewards/rejected": -7.556630611419678,
280
  "step": 160
281
  },
282
  {
283
- "epoch": 0.49,
284
- "grad_norm": 1732.7845040237094,
285
- "learning_rate": 3.056302334890786e-07,
286
- "logits/chosen": -4.18727970123291,
287
- "logits/rejected": -4.425799369812012,
288
- "logps/chosen": -294.9515075683594,
289
- "logps/rejected": -250.8203125,
290
- "loss": 0.8554,
291
- "rewards/accuracies": 0.84375,
292
- "rewards/chosen": 5.709182262420654,
293
- "rewards/margins": 14.079116821289062,
294
- "rewards/rejected": -8.369935989379883,
295
  "step": 170
296
  },
297
  {
298
- "epoch": 0.51,
299
- "grad_norm": 2121.8411757635613,
300
- "learning_rate": 2.810859261618713e-07,
301
- "logits/chosen": -4.313704490661621,
302
- "logits/rejected": -4.544769287109375,
303
- "logps/chosen": -282.9936828613281,
304
- "logps/rejected": -250.189453125,
305
- "loss": 0.7998,
306
- "rewards/accuracies": 0.824999988079071,
307
- "rewards/chosen": 6.883467674255371,
308
- "rewards/margins": 14.591836929321289,
309
- "rewards/rejected": -7.708369255065918,
310
  "step": 180
311
  },
312
- {
313
- "epoch": 0.54,
314
- "grad_norm": 1756.6999736537189,
315
- "learning_rate": 2.5623267293451823e-07,
316
- "logits/chosen": -4.283580303192139,
317
- "logits/rejected": -4.457066535949707,
318
- "logps/chosen": -305.109375,
319
- "logps/rejected": -265.3091125488281,
320
- "loss": 0.6833,
321
- "rewards/accuracies": 0.887499988079071,
322
- "rewards/chosen": 7.185091495513916,
323
- "rewards/margins": 13.808235168457031,
324
- "rewards/rejected": -6.623143196105957,
325
- "step": 190
326
- },
327
- {
328
- "epoch": 0.57,
329
- "grad_norm": 1748.1359792805438,
330
- "learning_rate": 2.3131747660339394e-07,
331
- "logits/chosen": -4.295716285705566,
332
- "logits/rejected": -4.545838832855225,
333
- "logps/chosen": -289.03521728515625,
334
- "logps/rejected": -257.3003234863281,
335
- "loss": 0.7717,
336
- "rewards/accuracies": 0.856249988079071,
337
- "rewards/chosen": 8.096379280090332,
338
- "rewards/margins": 14.581645011901855,
339
- "rewards/rejected": -6.485265254974365,
340
- "step": 200
341
- },
342
- {
343
- "epoch": 0.57,
344
- "eval_logits/chosen": -3.163522243499756,
345
- "eval_logits/rejected": -3.163522243499756,
346
- "eval_logps/chosen": -167.66006469726562,
347
- "eval_logps/rejected": -167.66006469726562,
348
- "eval_loss": 0.6931471824645996,
349
- "eval_rewards/accuracies": 0.0,
350
- "eval_rewards/chosen": -12.083049774169922,
351
- "eval_rewards/margins": 0.0,
352
- "eval_rewards/rejected": -12.083049774169922,
353
- "eval_runtime": 1.4711,
354
- "eval_samples_per_second": 0.68,
355
- "eval_steps_per_second": 0.68,
356
- "step": 200
357
- },
358
- {
359
- "epoch": 0.6,
360
- "grad_norm": 1829.602720191424,
361
- "learning_rate": 2.065879555832674e-07,
362
- "logits/chosen": -4.250877857208252,
363
- "logits/rejected": -4.542287826538086,
364
- "logps/chosen": -303.2696838378906,
365
- "logps/rejected": -262.7691650390625,
366
- "loss": 0.8229,
367
- "rewards/accuracies": 0.8687499761581421,
368
- "rewards/chosen": 7.95089864730835,
369
- "rewards/margins": 15.162821769714355,
370
- "rewards/rejected": -7.211922645568848,
371
- "step": 210
372
- },
373
- {
374
- "epoch": 0.63,
375
- "grad_norm": 1788.0044780541311,
376
- "learning_rate": 1.8228988296424875e-07,
377
- "logits/chosen": -4.257784366607666,
378
- "logits/rejected": -4.608870983123779,
379
- "logps/chosen": -293.54071044921875,
380
- "logps/rejected": -233.70166015625,
381
- "loss": 0.7993,
382
- "rewards/accuracies": 0.8812500238418579,
383
- "rewards/chosen": 7.973156929016113,
384
- "rewards/margins": 16.081945419311523,
385
- "rewards/rejected": -8.108789443969727,
386
- "step": 220
387
- },
388
- {
389
- "epoch": 0.66,
390
- "grad_norm": 1900.3878505004946,
391
- "learning_rate": 1.5866474390840124e-07,
392
- "logits/chosen": -4.334306716918945,
393
- "logits/rejected": -4.525221824645996,
394
- "logps/chosen": -278.41827392578125,
395
- "logps/rejected": -233.2617645263672,
396
- "loss": 0.6293,
397
- "rewards/accuracies": 0.862500011920929,
398
- "rewards/chosen": 6.539956569671631,
399
- "rewards/margins": 12.842289924621582,
400
- "rewards/rejected": -6.30233097076416,
401
- "step": 230
402
- },
403
- {
404
- "epoch": 0.69,
405
- "grad_norm": 1759.0935765619065,
406
- "learning_rate": 1.3594733566170925e-07,
407
- "logits/chosen": -4.360232353210449,
408
- "logits/rejected": -4.688153266906738,
409
- "logps/chosen": -255.55807495117188,
410
- "logps/rejected": -222.76504516601562,
411
- "loss": 1.0017,
412
- "rewards/accuracies": 0.831250011920929,
413
- "rewards/chosen": 6.956341743469238,
414
- "rewards/margins": 13.633699417114258,
415
- "rewards/rejected": -6.677358150482178,
416
- "step": 240
417
- },
418
- {
419
- "epoch": 0.71,
420
- "grad_norm": 1730.0292892661773,
421
- "learning_rate": 1.1436343403356016e-07,
422
- "logits/chosen": -4.352273941040039,
423
- "logits/rejected": -4.6365180015563965,
424
- "logps/chosen": -276.07659912109375,
425
- "logps/rejected": -241.01425170898438,
426
- "loss": 0.8087,
427
- "rewards/accuracies": 0.8687499761581421,
428
- "rewards/chosen": 6.498913764953613,
429
- "rewards/margins": 14.106257438659668,
430
- "rewards/rejected": -7.6073455810546875,
431
- "step": 250
432
- },
433
- {
434
- "epoch": 0.74,
435
- "grad_norm": 1731.9100006560693,
436
- "learning_rate": 9.412754953531663e-08,
437
- "logits/chosen": -4.388053894042969,
438
- "logits/rejected": -4.64432954788208,
439
- "logps/chosen": -270.12225341796875,
440
- "logps/rejected": -249.11416625976562,
441
- "loss": 0.6771,
442
- "rewards/accuracies": 0.8687499761581421,
443
- "rewards/chosen": 5.336110591888428,
444
- "rewards/margins": 13.311798095703125,
445
- "rewards/rejected": -7.975686073303223,
446
- "step": 260
447
- },
448
- {
449
- "epoch": 0.77,
450
- "grad_norm": 1559.894108247427,
451
- "learning_rate": 7.544079547848181e-08,
452
- "logits/chosen": -4.511970520019531,
453
- "logits/rejected": -4.677350044250488,
454
- "logps/chosen": -272.5389709472656,
455
- "logps/rejected": -237.0705108642578,
456
- "loss": 0.816,
457
- "rewards/accuracies": 0.831250011920929,
458
- "rewards/chosen": 4.5388288497924805,
459
- "rewards/margins": 12.382614135742188,
460
- "rewards/rejected": -7.843785762786865,
461
- "step": 270
462
- },
463
- {
464
- "epoch": 0.8,
465
- "grad_norm": 2131.975353118901,
466
- "learning_rate": 5.848888922025552e-08,
467
- "logits/chosen": -4.293630123138428,
468
- "logits/rejected": -4.587409496307373,
469
- "logps/chosen": -272.18914794921875,
470
- "logps/rejected": -237.04507446289062,
471
- "loss": 0.7076,
472
- "rewards/accuracies": 0.8500000238418579,
473
- "rewards/chosen": 6.89593505859375,
474
- "rewards/margins": 13.17906379699707,
475
- "rewards/rejected": -6.283128261566162,
476
- "step": 280
477
- },
478
- {
479
- "epoch": 0.83,
480
- "grad_norm": 2073.5868352302195,
481
- "learning_rate": 4.3440306421001324e-08,
482
- "logits/chosen": -4.306157112121582,
483
- "logits/rejected": -4.501837253570557,
484
- "logps/chosen": -279.41571044921875,
485
- "logps/rejected": -245.3014678955078,
486
- "loss": 0.7257,
487
- "rewards/accuracies": 0.918749988079071,
488
- "rewards/chosen": 7.150078773498535,
489
- "rewards/margins": 15.104804992675781,
490
- "rewards/rejected": -7.954724311828613,
491
- "step": 290
492
- },
493
- {
494
- "epoch": 0.86,
495
- "grad_norm": 1664.7949029760775,
496
- "learning_rate": 3.044460665744283e-08,
497
- "logits/chosen": -4.291565418243408,
498
- "logits/rejected": -4.546942234039307,
499
- "logps/chosen": -294.171142578125,
500
- "logps/rejected": -248.09219360351562,
501
- "loss": 0.782,
502
- "rewards/accuracies": 0.875,
503
- "rewards/chosen": 8.660406112670898,
504
- "rewards/margins": 15.387075424194336,
505
- "rewards/rejected": -6.726672172546387,
506
- "step": 300
507
- },
508
- {
509
- "epoch": 0.86,
510
- "eval_logits/chosen": -3.222372531890869,
511
- "eval_logits/rejected": -3.222372531890869,
512
- "eval_logps/chosen": -164.06509399414062,
513
- "eval_logps/rejected": -164.06509399414062,
514
- "eval_loss": 0.6931471824645996,
515
- "eval_rewards/accuracies": 0.0,
516
- "eval_rewards/chosen": -8.488082885742188,
517
- "eval_rewards/margins": 0.0,
518
- "eval_rewards/rejected": -8.488082885742188,
519
- "eval_runtime": 1.4741,
520
- "eval_samples_per_second": 0.678,
521
- "eval_steps_per_second": 0.678,
522
- "step": 300
523
- },
524
- {
525
- "epoch": 0.89,
526
- "grad_norm": 2673.859330983399,
527
- "learning_rate": 1.9630947032398066e-08,
528
- "logits/chosen": -4.44521427154541,
529
- "logits/rejected": -4.5893964767456055,
530
- "logps/chosen": -260.0516662597656,
531
- "logps/rejected": -234.23690795898438,
532
- "loss": 0.7257,
533
- "rewards/accuracies": 0.856249988079071,
534
- "rewards/chosen": 6.064515113830566,
535
- "rewards/margins": 13.008413314819336,
536
- "rewards/rejected": -6.943899631500244,
537
- "step": 310
538
- },
539
- {
540
- "epoch": 0.91,
541
- "grad_norm": 1691.3211570500287,
542
- "learning_rate": 1.1106798553464802e-08,
543
- "logits/chosen": -4.273613929748535,
544
- "logits/rejected": -4.540968894958496,
545
- "logps/chosen": -289.5185546875,
546
- "logps/rejected": -251.7852325439453,
547
- "loss": 0.6936,
548
- "rewards/accuracies": 0.887499988079071,
549
- "rewards/chosen": 7.365248203277588,
550
- "rewards/margins": 14.206552505493164,
551
- "rewards/rejected": -6.84130334854126,
552
- "step": 320
553
- },
554
- {
555
- "epoch": 0.94,
556
- "grad_norm": 2384.133770310185,
557
- "learning_rate": 4.956878037864043e-09,
558
- "logits/chosen": -4.335446834564209,
559
- "logits/rejected": -4.543330669403076,
560
- "logps/chosen": -303.6029968261719,
561
- "logps/rejected": -260.98760986328125,
562
- "loss": 0.7573,
563
- "rewards/accuracies": 0.856249988079071,
564
- "rewards/chosen": 5.982678413391113,
565
- "rewards/margins": 13.970565795898438,
566
- "rewards/rejected": -7.987887382507324,
567
- "step": 330
568
- },
569
- {
570
- "epoch": 0.97,
571
- "grad_norm": 2211.192973189294,
572
- "learning_rate": 1.2423061586496476e-09,
573
- "logits/chosen": -4.319240093231201,
574
- "logits/rejected": -4.623973846435547,
575
- "logps/chosen": -283.38006591796875,
576
- "logps/rejected": -234.4169921875,
577
- "loss": 0.8286,
578
- "rewards/accuracies": 0.8687499761581421,
579
- "rewards/chosen": 5.565188407897949,
580
- "rewards/margins": 11.47750186920166,
581
- "rewards/rejected": -5.912313938140869,
582
- "step": 340
583
- },
584
- {
585
- "epoch": 1.0,
586
- "grad_norm": 1586.2152634156816,
587
- "learning_rate": 0.0,
588
- "logits/chosen": -4.285008430480957,
589
- "logits/rejected": -4.503040790557861,
590
- "logps/chosen": -289.0204162597656,
591
- "logps/rejected": -251.9452362060547,
592
- "loss": 0.7666,
593
- "rewards/accuracies": 0.875,
594
- "rewards/chosen": 7.000736236572266,
595
- "rewards/margins": 13.714078903198242,
596
- "rewards/rejected": -6.713343143463135,
597
- "step": 350
598
- },
599
  {
600
  "epoch": 1.0,
601
- "step": 350,
602
  "total_flos": 0.0,
603
- "train_loss": 0.7428068714482444,
604
- "train_runtime": 5299.044,
605
- "train_samples_per_second": 8.446,
606
- "train_steps_per_second": 0.066
607
  }
608
  ],
609
  "logging_steps": 10,
610
- "max_steps": 350,
611
  "num_input_tokens_seen": 0,
612
  "num_train_epochs": 1,
613
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9960474308300395,
5
+ "eval_steps": 500,
6
+ "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "grad_norm": 26.908694644642612,
14
+ "learning_rate": 2.6315789473684208e-08,
15
+ "logits/chosen": -4.638427734375,
16
+ "logits/rejected": -4.891327857971191,
17
+ "logps/chosen": -198.52749633789062,
18
+ "logps/rejected": -147.3392791748047,
19
+ "loss": 0.6929,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.05,
28
+ "grad_norm": 27.07664091604203,
29
+ "learning_rate": 2.631578947368421e-07,
30
+ "logits/chosen": -4.496448993682861,
31
+ "logits/rejected": -4.815927028656006,
32
+ "logps/chosen": -224.28125,
33
+ "logps/rejected": -167.94735717773438,
34
+ "loss": 0.6915,
35
+ "rewards/accuracies": 0.5034722089767456,
36
+ "rewards/chosen": 0.0034646072890609503,
37
+ "rewards/margins": 0.002639756305143237,
38
+ "rewards/rejected": 0.0008248506928794086,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.11,
43
+ "grad_norm": 26.30402064096193,
44
+ "learning_rate": 4.999573126145131e-07,
45
+ "logits/chosen": -4.625959873199463,
46
+ "logits/rejected": -4.94482421875,
47
+ "logps/chosen": -231.04525756835938,
48
+ "logps/rejected": -196.3661651611328,
49
+ "loss": 0.6465,
50
+ "rewards/accuracies": 0.8125,
51
+ "rewards/chosen": -0.04837086424231529,
52
+ "rewards/margins": 0.11359457671642303,
53
+ "rewards/rejected": -0.16196544468402863,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.16,
58
+ "grad_norm": 25.70165553073792,
59
+ "learning_rate": 4.948524419003415e-07,
60
+ "logits/chosen": -4.869608402252197,
61
+ "logits/rejected": -5.148451805114746,
62
+ "logps/chosen": -273.7060241699219,
63
+ "logps/rejected": -259.2108154296875,
64
+ "loss": 0.5717,
65
+ "rewards/accuracies": 0.778124988079071,
66
+ "rewards/chosen": -0.5196550488471985,
67
+ "rewards/margins": 0.3445150554180145,
68
+ "rewards/rejected": -0.8641700744628906,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.21,
73
+ "grad_norm": 32.38040367732233,
74
+ "learning_rate": 4.81409414945389e-07,
75
+ "logits/chosen": -4.95624303817749,
76
+ "logits/rejected": -5.334275245666504,
77
+ "logps/chosen": -321.26739501953125,
78
+ "logps/rejected": -317.9222106933594,
79
+ "loss": 0.5311,
80
+ "rewards/accuracies": 0.793749988079071,
81
+ "rewards/chosen": -0.9283856153488159,
82
+ "rewards/margins": 0.48562851548194885,
83
+ "rewards/rejected": -1.4140141010284424,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.26,
88
+ "grad_norm": 27.107807886309228,
89
+ "learning_rate": 4.6008601790947314e-07,
90
+ "logits/chosen": -5.323241233825684,
91
+ "logits/rejected": -5.817015171051025,
92
+ "logps/chosen": -357.8787536621094,
93
+ "logps/rejected": -385.47576904296875,
94
+ "loss": 0.4831,
95
+ "rewards/accuracies": 0.8500000238418579,
96
+ "rewards/chosen": -1.411299467086792,
97
+ "rewards/margins": 0.7530988454818726,
98
+ "rewards/rejected": -2.164398193359375,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.32,
103
+ "grad_norm": 32.232061879934236,
104
+ "learning_rate": 4.3160839350405605e-07,
105
+ "logits/chosen": -5.831389904022217,
106
+ "logits/rejected": -6.2499542236328125,
107
+ "logps/chosen": -395.7707824707031,
108
+ "logps/rejected": -446.3265686035156,
109
+ "loss": 0.4294,
110
+ "rewards/accuracies": 0.778124988079071,
111
+ "rewards/chosen": -1.821434736251831,
112
+ "rewards/margins": 0.9432821273803711,
113
+ "rewards/rejected": -2.7647171020507812,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.37,
118
+ "grad_norm": 35.03072007251475,
119
+ "learning_rate": 3.9694631307311825e-07,
120
+ "logits/chosen": -6.090306758880615,
121
+ "logits/rejected": -6.541258335113525,
122
+ "logps/chosen": -430.2369689941406,
123
+ "logps/rejected": -496.2119140625,
124
+ "loss": 0.424,
125
+ "rewards/accuracies": 0.8187500238418579,
126
+ "rewards/chosen": -2.1488282680511475,
127
+ "rewards/margins": 1.0692826509475708,
128
+ "rewards/rejected": -3.218111038208008,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.42,
133
+ "grad_norm": 30.667469826354093,
134
+ "learning_rate": 3.572801521931522e-07,
135
+ "logits/chosen": -6.3887619972229,
136
+ "logits/rejected": -6.877404689788818,
137
+ "logps/chosen": -439.2911071777344,
138
+ "logps/rejected": -526.5487060546875,
139
+ "loss": 0.4001,
140
+ "rewards/accuracies": 0.815625011920929,
141
+ "rewards/chosen": -2.3115358352661133,
142
+ "rewards/margins": 1.2141239643096924,
143
+ "rewards/rejected": -3.5256600379943848,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.47,
148
+ "grad_norm": 37.36819911889553,
149
+ "learning_rate": 3.139606943986089e-07,
150
+ "logits/chosen": -6.5696258544921875,
151
+ "logits/rejected": -7.1035637855529785,
152
+ "logps/chosen": -458.3387756347656,
153
+ "logps/rejected": -556.1650390625,
154
+ "loss": 0.3875,
155
+ "rewards/accuracies": 0.7718750238418579,
156
+ "rewards/chosen": -2.5067451000213623,
157
+ "rewards/margins": 1.3494058847427368,
158
+ "rewards/rejected": -3.8561508655548096,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.53,
163
+ "grad_norm": 33.15053822353323,
164
+ "learning_rate": 2.684631318687185e-07,
165
+ "logits/chosen": -6.621747016906738,
166
+ "logits/rejected": -7.236710548400879,
167
+ "logps/chosen": -467.0467834472656,
168
+ "logps/rejected": -582.046142578125,
169
+ "loss": 0.3867,
170
+ "rewards/accuracies": 0.796875,
171
+ "rewards/chosen": -2.4837827682495117,
172
+ "rewards/margins": 1.5418504476547241,
173
+ "rewards/rejected": -4.025633811950684,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.58,
178
+ "grad_norm": 37.45830028947681,
179
+ "learning_rate": 2.2233682952712483e-07,
180
+ "logits/chosen": -6.568659782409668,
181
+ "logits/rejected": -7.284300327301025,
182
+ "logps/chosen": -460.4766540527344,
183
+ "logps/rejected": -578.6600341796875,
184
+ "loss": 0.3771,
185
+ "rewards/accuracies": 0.846875011920929,
186
+ "rewards/chosen": -2.3609726428985596,
187
+ "rewards/margins": 1.647943139076233,
188
+ "rewards/rejected": -4.008915901184082,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.63,
193
+ "grad_norm": 33.8427535333109,
194
+ "learning_rate": 1.7715256327766884e-07,
195
+ "logits/chosen": -6.796021461486816,
196
+ "logits/rejected": -7.497170925140381,
197
+ "logps/chosen": -504.50543212890625,
198
+ "logps/rejected": -621.22314453125,
199
+ "loss": 0.3508,
200
+ "rewards/accuracies": 0.815625011920929,
201
+ "rewards/chosen": -2.883434295654297,
202
+ "rewards/margins": 1.6248239278793335,
203
+ "rewards/rejected": -4.50825834274292,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.69,
208
+ "grad_norm": 35.353347844932394,
209
+ "learning_rate": 1.3444902911492174e-07,
210
+ "logits/chosen": -6.833544731140137,
211
+ "logits/rejected": -7.472651481628418,
212
+ "logps/chosen": -521.9656372070312,
213
+ "logps/rejected": -659.3110961914062,
214
+ "loss": 0.3705,
215
+ "rewards/accuracies": 0.840624988079071,
216
+ "rewards/chosen": -2.989759922027588,
217
+ "rewards/margins": 1.8119176626205444,
218
+ "rewards/rejected": -4.801677227020264,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.74,
223
+ "grad_norm": 32.6045025544378,
224
+ "learning_rate": 9.56804446775518e-08,
225
+ "logits/chosen": -6.738868713378906,
226
+ "logits/rejected": -7.498864650726318,
227
+ "logps/chosen": -470.77337646484375,
228
+ "logps/rejected": -584.4710083007812,
229
+ "loss": 0.3591,
230
+ "rewards/accuracies": 0.831250011920929,
231
+ "rewards/chosen": -2.558176040649414,
232
+ "rewards/margins": 1.6057535409927368,
233
+ "rewards/rejected": -4.163929462432861,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 0.79,
238
+ "grad_norm": 32.49183208247093,
239
+ "learning_rate": 6.216702761078166e-08,
240
+ "logits/chosen": -7.049106597900391,
241
+ "logits/rejected": -7.772597312927246,
242
+ "logps/chosen": -487.25726318359375,
243
+ "logps/rejected": -619.6534423828125,
244
+ "loss": 0.3576,
245
+ "rewards/accuracies": 0.8343750238418579,
246
+ "rewards/chosen": -2.82848858833313,
247
+ "rewards/margins": 1.7726719379425049,
248
+ "rewards/rejected": -4.601161003112793,
249
  "step": 150
250
  },
251
  {
252
+ "epoch": 0.84,
253
+ "grad_norm": 37.26747220029015,
254
+ "learning_rate": 3.5050037137906885e-08,
255
+ "logits/chosen": -6.9701337814331055,
256
+ "logits/rejected": -7.731366157531738,
257
+ "logps/chosen": -494.5716247558594,
258
+ "logps/rejected": -623.4630737304688,
259
+ "loss": 0.3502,
260
+ "rewards/accuracies": 0.84375,
261
+ "rewards/chosen": -2.750206470489502,
262
+ "rewards/margins": 1.7980148792266846,
263
+ "rewards/rejected": -4.548220634460449,
264
  "step": 160
265
  },
266
  {
267
+ "epoch": 0.9,
268
+ "grad_norm": 31.918546112926368,
269
+ "learning_rate": 1.5252909846235894e-08,
270
+ "logits/chosen": -7.007571220397949,
271
+ "logits/rejected": -7.6982011795043945,
272
+ "logps/chosen": -509.54388427734375,
273
+ "logps/rejected": -666.7489624023438,
274
+ "loss": 0.3631,
275
+ "rewards/accuracies": 0.890625,
276
+ "rewards/chosen": -2.9002063274383545,
277
+ "rewards/margins": 2.0059866905212402,
278
+ "rewards/rejected": -4.906193733215332,
279
  "step": 170
280
  },
281
  {
282
+ "epoch": 0.95,
283
+ "grad_norm": 29.32551345390984,
284
+ "learning_rate": 3.4498131616493565e-09,
285
+ "logits/chosen": -6.939836025238037,
286
+ "logits/rejected": -7.576680660247803,
287
+ "logps/chosen": -514.7128295898438,
288
+ "logps/rejected": -656.9924926757812,
289
+ "loss": 0.3518,
290
+ "rewards/accuracies": 0.8031250238418579,
291
+ "rewards/chosen": -2.932391881942749,
292
+ "rewards/margins": 1.7769733667373657,
293
+ "rewards/rejected": -4.709364891052246,
294
  "step": 180
295
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  {
297
  "epoch": 1.0,
298
+ "step": 189,
299
  "total_flos": 0.0,
300
+ "train_loss": 0.42979541909757746,
301
+ "train_runtime": 5368.3646,
302
+ "train_samples_per_second": 9.04,
303
+ "train_steps_per_second": 0.035
304
  }
305
  ],
306
  "logging_steps": 10,
307
+ "max_steps": 189,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 1,
310
  "save_steps": 100,