wzhouad commited on
Commit
6e971d2
1 Parent(s): 0673efe

Model save

Browse files
README.md CHANGED
@@ -14,16 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # zephyr-7b-dpo-full
15
 
16
  This model was trained from scratch on the None dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 0.0415
19
- - Rewards/chosen: -1.1176
20
- - Rewards/rejected: -2.0114
21
- - Rewards/accuracies: 0.7070
22
- - Rewards/margins: 0.8938
23
- - Logps/rejected: -531.2747
24
- - Logps/chosen: -435.5875
25
- - Logits/rejected: 0.8196
26
- - Logits/chosen: 0.7291
27
 
28
  ## Model description
29
 
@@ -43,12 +33,12 @@ More information needed
43
 
44
  The following hyperparameters were used during training:
45
  - learning_rate: 3e-06
46
- - train_batch_size: 4
47
  - eval_batch_size: 8
48
- - seed: 5
49
  - distributed_type: multi-GPU
50
  - num_devices: 8
51
- - gradient_accumulation_steps: 4
52
  - total_train_batch_size: 128
53
  - total_eval_batch_size: 64
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -58,12 +48,6 @@ The following hyperparameters were used during training:
58
 
59
  ### Training results
60
 
61
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.066 | 0.21 | 100 | 0.0702 | -0.4714 | -1.0800 | 0.7266 | 0.6086 | -438.1371 | -370.9747 | 0.7687 | 0.6183 |
64
- | 0.0477 | 0.42 | 200 | 0.0505 | -1.0382 | -1.8566 | 0.7461 | 0.8184 | -515.7967 | -427.6501 | 0.5198 | 0.4181 |
65
- | 0.0313 | 0.63 | 300 | 0.0344 | -1.3029 | -2.2224 | 0.7227 | 0.9195 | -552.3698 | -454.1193 | 1.0434 | 0.9401 |
66
- | 0.0359 | 0.84 | 400 | 0.0415 | -1.1176 | -2.0114 | 0.7070 | 0.8938 | -531.2747 | -435.5875 | 0.8196 | 0.7291 |
67
 
68
 
69
  ### Framework versions
 
14
  # zephyr-7b-dpo-full
15
 
16
  This model was trained from scratch on the None dataset.
 
 
 
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 3e-06
36
+ - train_batch_size: 2
37
  - eval_batch_size: 8
38
+ - seed: 1
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
+ - gradient_accumulation_steps: 8
42
  - total_train_batch_size: 128
43
  - total_eval_batch_size: 64
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
48
 
49
  ### Training results
50
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.055112330793584664,
4
- "train_runtime": 4571.3444,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 13.373,
7
- "train_steps_per_second": 0.104
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.3547657697973117,
4
+ "train_runtime": 5270.9361,
5
+ "train_samples": 45548,
6
+ "train_samples_per_second": 8.641,
7
+ "train_steps_per_second": 0.067
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77c78f44ae927b8c5f876cba766716862c391ff327d777f630df2273dc608ad2
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e0dec32f242e5185356c9aabdb63b6361f0e76923db502922b96ae33954e21
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52890ec8e3b01c2a425c75a5fe8026fad3760550ffe4ecc542adabcb6547e556
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c71f08ad1d050aa51128e168230fbf7b08a12a7469048f453be868dc4b011c1
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c4e28b526b64115f67f1a7d9ceb1156546b14ddfbf6c799c751ac2c949af93b
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e7b9d8250962948a9f4a596ab9c8e0fba7ec3e5cbb4089122bb5bb2d64378b
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331daeef21c9b60a293872df524529661446efaf2f056cc336b124cce438e3cb
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213a31aa46d216f4a6147988c22f9d730650373e142b430f375ebcf8f54ab823
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.055112330793584664,
4
- "train_runtime": 4571.3444,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 13.373,
7
- "train_steps_per_second": 0.104
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.3547657697973117,
4
+ "train_runtime": 5270.9361,
5
+ "train_samples": 45548,
6
+ "train_samples_per_second": 8.641,
7
+ "train_steps_per_second": 0.067
8
  }
trainer_state.json CHANGED
@@ -1,763 +1,517 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9984301412872841,
5
- "eval_steps": 100,
6
- "global_step": 477,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 6.25e-08,
14
- "logits/chosen": 0.10802720487117767,
15
- "logits/rejected": 0.30745893716812134,
16
- "logps/chosen": -475.5745544433594,
17
- "logps/rejected": -317.21234130859375,
18
- "loss": 0.1378,
19
- "rewards/accuracies": 0.0,
20
- "rewards/chosen": 0.0,
21
- "rewards/margins": 0.0,
22
- "rewards/rejected": 0.0,
23
- "step": 1
24
- },
25
- {
26
- "epoch": 0.02,
27
- "learning_rate": 6.25e-07,
28
- "logits/chosen": 0.21480141580104828,
29
- "logits/rejected": 0.3137889802455902,
30
- "logps/chosen": -308.09619140625,
31
- "logps/rejected": -308.85736083984375,
32
- "loss": 0.1432,
33
- "rewards/accuracies": 0.4791666567325592,
34
- "rewards/chosen": -0.0008134886738844216,
35
- "rewards/margins": 0.0006454013055190444,
36
- "rewards/rejected": -0.001458889921195805,
37
  "step": 10
38
  },
39
  {
40
- "epoch": 0.04,
41
- "learning_rate": 1.25e-06,
42
- "logits/chosen": 0.249754399061203,
43
- "logits/rejected": 0.2825905978679657,
44
- "logps/chosen": -304.75286865234375,
45
- "logps/rejected": -317.61688232421875,
46
- "loss": 0.1418,
47
- "rewards/accuracies": 0.6625000238418579,
48
- "rewards/chosen": 0.0010095896432176232,
49
- "rewards/margins": 0.010475357994437218,
50
- "rewards/rejected": -0.009465768001973629,
51
  "step": 20
52
  },
53
  {
54
- "epoch": 0.06,
55
- "learning_rate": 1.875e-06,
56
- "logits/chosen": 0.24968624114990234,
57
- "logits/rejected": 0.2685222029685974,
58
- "logps/chosen": -366.27813720703125,
59
- "logps/rejected": -365.3521728515625,
60
- "loss": 0.1431,
61
- "rewards/accuracies": 0.6312500238418579,
62
- "rewards/chosen": 0.014242827892303467,
63
- "rewards/margins": 0.06069143861532211,
64
- "rewards/rejected": -0.046448610723018646,
65
  "step": 30
66
  },
67
  {
68
- "epoch": 0.08,
69
- "learning_rate": 2.5e-06,
70
- "logits/chosen": 0.5138859748840332,
71
- "logits/rejected": 0.6031057238578796,
72
- "logps/chosen": -333.85650634765625,
73
- "logps/rejected": -331.0009765625,
74
- "loss": 0.1181,
75
- "rewards/accuracies": 0.6187499761581421,
76
- "rewards/chosen": -0.17486190795898438,
77
- "rewards/margins": 0.1082921177148819,
78
- "rewards/rejected": -0.2831540107727051,
79
  "step": 40
80
  },
81
  {
82
- "epoch": 0.1,
83
- "learning_rate": 2.999839121261416e-06,
84
- "logits/chosen": 0.7348484992980957,
85
- "logits/rejected": 0.8855365514755249,
86
- "logps/chosen": -370.4933776855469,
87
- "logps/rejected": -411.83404541015625,
88
- "loss": 0.0741,
89
- "rewards/accuracies": 0.6875,
90
- "rewards/chosen": -0.4408305287361145,
91
- "rewards/margins": 0.4697234034538269,
92
- "rewards/rejected": -0.9105539321899414,
93
  "step": 50
94
  },
95
  {
96
- "epoch": 0.13,
97
- "learning_rate": 2.994211988057582e-06,
98
- "logits/chosen": 0.7168207764625549,
99
- "logits/rejected": 0.8200086355209351,
100
- "logps/chosen": -341.53277587890625,
101
- "logps/rejected": -380.68243408203125,
102
- "loss": 0.0819,
103
- "rewards/accuracies": 0.6937500238418579,
104
- "rewards/chosen": -0.4272558093070984,
105
- "rewards/margins": 0.4549214839935303,
106
- "rewards/rejected": -0.8821773529052734,
107
  "step": 60
108
  },
109
  {
110
- "epoch": 0.15,
111
- "learning_rate": 2.9805753939568693e-06,
112
- "logits/chosen": 0.5615164041519165,
113
- "logits/rejected": 0.7741672396659851,
114
- "logps/chosen": -347.7218017578125,
115
- "logps/rejected": -330.172607421875,
116
- "loss": 0.0929,
117
- "rewards/accuracies": 0.6625000238418579,
118
- "rewards/chosen": -0.3559855818748474,
119
- "rewards/margins": 0.2854944169521332,
120
- "rewards/rejected": -0.6414799690246582,
121
  "step": 70
122
  },
123
  {
124
- "epoch": 0.17,
125
- "learning_rate": 2.959002435526626e-06,
126
- "logits/chosen": 0.5198915004730225,
127
- "logits/rejected": 0.725387454032898,
128
- "logps/chosen": -389.0698547363281,
129
- "logps/rejected": -371.3795471191406,
130
- "loss": 0.0736,
131
- "rewards/accuracies": 0.6937500238418579,
132
- "rewards/chosen": -0.4846402108669281,
133
- "rewards/margins": 0.395100474357605,
134
- "rewards/rejected": -0.8797407150268555,
135
  "step": 80
136
  },
137
  {
138
- "epoch": 0.19,
139
- "learning_rate": 2.929608750821129e-06,
140
- "logits/chosen": 0.3736918568611145,
141
- "logits/rejected": 0.5658319592475891,
142
- "logps/chosen": -444.59234619140625,
143
- "logps/rejected": -464.6935119628906,
144
- "loss": 0.0491,
145
- "rewards/accuracies": 0.699999988079071,
146
- "rewards/chosen": -0.8689848184585571,
147
- "rewards/margins": 0.6035453081130981,
148
- "rewards/rejected": -1.4725301265716553,
149
  "step": 90
150
  },
151
  {
152
- "epoch": 0.21,
153
- "learning_rate": 2.892551899524109e-06,
154
- "logits/chosen": 0.3380030393600464,
155
- "logits/rejected": 0.443446546792984,
156
- "logps/chosen": -408.71551513671875,
157
- "logps/rejected": -431.513671875,
158
- "loss": 0.066,
159
- "rewards/accuracies": 0.643750011920929,
160
- "rewards/chosen": -0.8214343786239624,
161
- "rewards/margins": 0.3987075388431549,
162
- "rewards/rejected": -1.2201420068740845,
163
- "step": 100
164
- },
165
- {
166
- "epoch": 0.21,
167
- "eval_logits/chosen": 0.6183323860168457,
168
- "eval_logits/rejected": 0.7686768174171448,
169
- "eval_logps/chosen": -370.9747009277344,
170
- "eval_logps/rejected": -438.13714599609375,
171
- "eval_loss": 0.07016688585281372,
172
- "eval_rewards/accuracies": 0.7265625,
173
- "eval_rewards/chosen": -0.47144782543182373,
174
- "eval_rewards/margins": 0.6085766553878784,
175
- "eval_rewards/rejected": -1.0800243616104126,
176
- "eval_runtime": 74.3034,
177
- "eval_samples_per_second": 26.917,
178
- "eval_steps_per_second": 0.431,
179
  "step": 100
180
  },
181
  {
182
- "epoch": 0.23,
183
- "learning_rate": 2.848030518377739e-06,
184
- "logits/chosen": 0.48754867911338806,
185
- "logits/rejected": 0.6056569814682007,
186
- "logps/chosen": -394.04449462890625,
187
- "logps/rejected": -424.449951171875,
188
- "loss": 0.06,
189
- "rewards/accuracies": 0.6499999761581421,
190
- "rewards/chosen": -0.6658821105957031,
191
- "rewards/margins": 0.43674975633621216,
192
- "rewards/rejected": -1.1026318073272705,
193
  "step": 110
194
  },
195
  {
196
- "epoch": 0.25,
197
- "learning_rate": 2.7962832564252724e-06,
198
- "logits/chosen": 0.5436107516288757,
199
- "logits/rejected": 0.6737319231033325,
200
- "logps/chosen": -429.415283203125,
201
- "logps/rejected": -469.0088806152344,
202
- "loss": 0.0627,
203
- "rewards/accuracies": 0.6499999761581421,
204
- "rewards/chosen": -0.7700729370117188,
205
- "rewards/margins": 0.48356789350509644,
206
- "rewards/rejected": -1.2536407709121704,
207
  "step": 120
208
  },
209
  {
210
- "epoch": 0.27,
211
- "learning_rate": 2.7375874957747644e-06,
212
- "logits/chosen": 0.5728715062141418,
213
- "logits/rejected": 0.7463508248329163,
214
- "logps/chosen": -441.0868225097656,
215
- "logps/rejected": -454.98748779296875,
216
- "loss": 0.0621,
217
- "rewards/accuracies": 0.7124999761581421,
218
- "rewards/chosen": -0.8495699763298035,
219
- "rewards/margins": 0.5289269685745239,
220
- "rewards/rejected": -1.3784968852996826,
221
  "step": 130
222
  },
223
  {
224
- "epoch": 0.29,
225
- "learning_rate": 2.672257864741005e-06,
226
- "logits/chosen": 0.6253047585487366,
227
- "logits/rejected": 0.786455512046814,
228
- "logps/chosen": -433.4244079589844,
229
- "logps/rejected": -461.5254821777344,
230
- "loss": 0.0435,
231
- "rewards/accuracies": 0.706250011920929,
232
- "rewards/chosen": -1.083187460899353,
233
- "rewards/margins": 0.5303990840911865,
234
- "rewards/rejected": -1.61358642578125,
235
  "step": 140
236
  },
237
  {
238
- "epoch": 0.31,
239
- "learning_rate": 2.600644551335706e-06,
240
- "logits/chosen": 0.7765518426895142,
241
- "logits/rejected": 0.984174907207489,
242
- "logps/chosen": -419.31109619140625,
243
- "logps/rejected": -431.96795654296875,
244
- "loss": 0.0444,
245
- "rewards/accuracies": 0.637499988079071,
246
- "rewards/chosen": -1.0778591632843018,
247
- "rewards/margins": 0.4318017363548279,
248
- "rewards/rejected": -1.5096609592437744,
249
  "step": 150
250
  },
251
  {
252
- "epoch": 0.33,
253
- "learning_rate": 2.5231314261461732e-06,
254
- "logits/chosen": 0.513221025466919,
255
- "logits/rejected": 0.7459092140197754,
256
- "logps/chosen": -418.07421875,
257
- "logps/rejected": -463.25408935546875,
258
- "loss": 0.0586,
259
- "rewards/accuracies": 0.706250011920929,
260
- "rewards/chosen": -0.6770002245903015,
261
- "rewards/margins": 0.5035561323165894,
262
- "rewards/rejected": -1.1805565357208252,
263
  "step": 160
264
  },
265
  {
266
- "epoch": 0.36,
267
- "learning_rate": 2.440133984664454e-06,
268
- "logits/chosen": 0.5670315027236938,
269
- "logits/rejected": 0.8073333501815796,
270
- "logps/chosen": -390.5821228027344,
271
- "logps/rejected": -419.92626953125,
272
- "loss": 0.0562,
273
- "rewards/accuracies": 0.706250011920929,
274
- "rewards/chosen": -0.8130921125411987,
275
- "rewards/margins": 0.4765067994594574,
276
- "rewards/rejected": -1.289598822593689,
277
  "step": 170
278
  },
279
  {
280
- "epoch": 0.38,
281
- "learning_rate": 2.3520971200967337e-06,
282
- "logits/chosen": 0.39020082354545593,
283
- "logits/rejected": 0.4927116334438324,
284
- "logps/chosen": -379.1041259765625,
285
- "logps/rejected": -440.0082092285156,
286
- "loss": 0.0533,
287
- "rewards/accuracies": 0.6187499761581421,
288
- "rewards/chosen": -0.8111687898635864,
289
- "rewards/margins": 0.5016359090805054,
290
- "rewards/rejected": -1.3128045797348022,
291
  "step": 180
292
  },
293
  {
294
- "epoch": 0.4,
295
- "learning_rate": 2.2594927385914546e-06,
296
- "logits/chosen": 0.32924190163612366,
297
- "logits/rejected": 0.46087831258773804,
298
- "logps/chosen": -382.1633605957031,
299
- "logps/rejected": -444.0999450683594,
300
- "loss": 0.0495,
301
- "rewards/accuracies": 0.7124999761581421,
302
- "rewards/chosen": -0.7652384042739868,
303
- "rewards/margins": 0.6649683117866516,
304
- "rewards/rejected": -1.4302066564559937,
305
  "step": 190
306
  },
307
  {
308
- "epoch": 0.42,
309
- "learning_rate": 2.1628172296692954e-06,
310
- "logits/chosen": 0.21413707733154297,
311
- "logits/rejected": 0.302509069442749,
312
- "logps/chosen": -465.3833923339844,
313
- "logps/rejected": -511.8447265625,
314
- "loss": 0.0477,
315
- "rewards/accuracies": 0.7124999761581421,
316
- "rewards/chosen": -1.172499179840088,
317
- "rewards/margins": 0.573866069316864,
318
- "rewards/rejected": -1.7463653087615967,
319
  "step": 200
320
  },
321
  {
322
- "epoch": 0.42,
323
- "eval_logits/chosen": 0.41806796193122864,
324
- "eval_logits/rejected": 0.5197638273239136,
325
- "eval_logps/chosen": -427.650146484375,
326
- "eval_logps/rejected": -515.7966918945312,
327
- "eval_loss": 0.050458863377571106,
328
- "eval_rewards/accuracies": 0.74609375,
329
- "eval_rewards/chosen": -1.038202166557312,
330
- "eval_rewards/margins": 0.8184179663658142,
331
- "eval_rewards/rejected": -1.856619954109192,
332
- "eval_runtime": 75.1858,
333
- "eval_samples_per_second": 26.601,
334
- "eval_steps_per_second": 0.426,
335
- "step": 200
336
- },
337
- {
338
- "epoch": 0.44,
339
- "learning_rate": 2.062588805414343e-06,
340
- "logits/chosen": 0.29592061042785645,
341
- "logits/rejected": 0.39124542474746704,
342
- "logps/chosen": -458.99554443359375,
343
- "logps/rejected": -476.7998046875,
344
- "loss": 0.0543,
345
- "rewards/accuracies": 0.706250011920929,
346
- "rewards/chosen": -1.0953991413116455,
347
- "rewards/margins": 0.6356866955757141,
348
- "rewards/rejected": -1.731086015701294,
349
  "step": 210
350
  },
351
  {
352
- "epoch": 0.46,
353
- "learning_rate": 1.9593447226892386e-06,
354
- "logits/chosen": 0.23310557007789612,
355
- "logits/rejected": 0.4742186963558197,
356
- "logps/chosen": -441.21649169921875,
357
- "logps/rejected": -468.25286865234375,
358
- "loss": 0.0599,
359
- "rewards/accuracies": 0.71875,
360
- "rewards/chosen": -0.9769255518913269,
361
- "rewards/margins": 0.7468104362487793,
362
- "rewards/rejected": -1.723736047744751,
363
  "step": 220
364
  },
365
  {
366
- "epoch": 0.48,
367
- "learning_rate": 1.853638403264141e-06,
368
- "logits/chosen": 0.4100280702114105,
369
- "logits/rejected": 0.5993035435676575,
370
- "logps/chosen": -494.64324951171875,
371
- "logps/rejected": -490.0165100097656,
372
- "loss": 0.0578,
373
- "rewards/accuracies": 0.7250000238418579,
374
- "rewards/chosen": -1.2230786085128784,
375
- "rewards/margins": 0.6530172824859619,
376
- "rewards/rejected": -1.8760957717895508,
377
  "step": 230
378
  },
379
  {
380
- "epoch": 0.5,
381
- "learning_rate": 1.7460364672965328e-06,
382
- "logits/chosen": 0.6504024267196655,
383
- "logits/rejected": 0.7802666425704956,
384
- "logps/chosen": -466.16973876953125,
385
- "logps/rejected": -511.08502197265625,
386
- "loss": 0.0549,
387
- "rewards/accuracies": 0.643750011920929,
388
- "rewards/chosen": -1.191239595413208,
389
- "rewards/margins": 0.7851654291152954,
390
- "rewards/rejected": -1.976405143737793,
391
  "step": 240
392
  },
393
  {
394
- "epoch": 0.52,
395
- "learning_rate": 1.637115696063402e-06,
396
- "logits/chosen": 0.7357971668243408,
397
- "logits/rejected": 0.8341084718704224,
398
- "logps/chosen": -462.93048095703125,
399
- "logps/rejected": -550.9013671875,
400
- "loss": 0.0342,
401
- "rewards/accuracies": 0.675000011920929,
402
- "rewards/chosen": -1.577097773551941,
403
- "rewards/margins": 0.7956889271736145,
404
- "rewards/rejected": -2.3727867603302,
405
  "step": 250
406
  },
407
  {
408
- "epoch": 0.54,
409
- "learning_rate": 1.5274599402265162e-06,
410
- "logits/chosen": 0.7676488757133484,
411
- "logits/rejected": 0.9279497861862183,
412
- "logps/chosen": -490.0227966308594,
413
- "logps/rejected": -543.2033081054688,
414
- "loss": 0.0336,
415
- "rewards/accuracies": 0.6625000238418579,
416
- "rewards/chosen": -1.5475876331329346,
417
- "rewards/margins": 0.6384353041648865,
418
- "rewards/rejected": -2.186022996902466,
419
  "step": 260
420
  },
421
  {
422
- "epoch": 0.57,
423
- "learning_rate": 1.4176569902035088e-06,
424
- "logits/chosen": 0.7670334577560425,
425
- "logits/rejected": 0.927658200263977,
426
- "logps/chosen": -455.6305236816406,
427
- "logps/rejected": -507.54913330078125,
428
- "loss": 0.0334,
429
- "rewards/accuracies": 0.675000011920929,
430
- "rewards/chosen": -1.38298761844635,
431
- "rewards/margins": 0.6534308195114136,
432
- "rewards/rejected": -2.0364184379577637,
433
  "step": 270
434
  },
435
  {
436
- "epoch": 0.59,
437
- "learning_rate": 1.308295425420593e-06,
438
- "logits/chosen": 0.7235329151153564,
439
- "logits/rejected": 0.8158149719238281,
440
- "logps/chosen": -491.1328125,
441
- "logps/rejected": -560.6801147460938,
442
- "loss": 0.0301,
443
- "rewards/accuracies": 0.699999988079071,
444
- "rewards/chosen": -1.4047319889068604,
445
- "rewards/margins": 0.7390089631080627,
446
- "rewards/rejected": -2.1437408924102783,
447
  "step": 280
448
  },
449
  {
450
- "epoch": 0.61,
451
- "learning_rate": 1.1999614593359337e-06,
452
- "logits/chosen": 0.7884746789932251,
453
- "logits/rejected": 1.0120609998703003,
454
- "logps/chosen": -492.41693115234375,
455
- "logps/rejected": -518.9060668945312,
456
- "loss": 0.03,
457
- "rewards/accuracies": 0.6625000238418579,
458
- "rewards/chosen": -1.4595239162445068,
459
- "rewards/margins": 0.7071082592010498,
460
- "rewards/rejected": -2.1666321754455566,
461
  "step": 290
462
  },
463
  {
464
- "epoch": 0.63,
465
- "learning_rate": 1.0932357971453745e-06,
466
- "logits/chosen": 0.8025213479995728,
467
- "logits/rejected": 0.9630680084228516,
468
- "logps/chosen": -472.7798767089844,
469
- "logps/rejected": -523.0516967773438,
470
- "loss": 0.0313,
471
- "rewards/accuracies": 0.6875,
472
- "rewards/chosen": -1.4041074514389038,
473
- "rewards/margins": 0.6285351514816284,
474
- "rewards/rejected": -2.0326426029205322,
475
- "step": 300
476
- },
477
- {
478
- "epoch": 0.63,
479
- "eval_logits/chosen": 0.9400739669799805,
480
- "eval_logits/rejected": 1.0433921813964844,
481
- "eval_logps/chosen": -454.1192932128906,
482
- "eval_logps/rejected": -552.3697509765625,
483
- "eval_loss": 0.03436482325196266,
484
- "eval_rewards/accuracies": 0.72265625,
485
- "eval_rewards/chosen": -1.3028936386108398,
486
- "eval_rewards/margins": 0.9194571375846863,
487
- "eval_rewards/rejected": -2.222350835800171,
488
- "eval_runtime": 75.6069,
489
- "eval_samples_per_second": 26.453,
490
- "eval_steps_per_second": 0.423,
491
  "step": 300
492
  },
493
  {
494
- "epoch": 0.65,
495
- "learning_rate": 9.886905230142433e-07,
496
- "logits/chosen": 0.7544746398925781,
497
- "logits/rejected": 0.9142723083496094,
498
- "logps/chosen": -462.0435485839844,
499
- "logps/rejected": -525.331298828125,
500
- "loss": 0.0346,
501
- "rewards/accuracies": 0.6625000238418579,
502
- "rewards/chosen": -1.3456170558929443,
503
- "rewards/margins": 0.749636709690094,
504
- "rewards/rejected": -2.0952537059783936,
505
  "step": 310
506
  },
507
  {
508
- "epoch": 0.67,
509
- "learning_rate": 8.868860335206678e-07,
510
- "logits/chosen": 0.9283370971679688,
511
- "logits/rejected": 1.136993169784546,
512
- "logps/chosen": -478.44976806640625,
513
- "logps/rejected": -530.1534423828125,
514
- "loss": 0.0338,
515
- "rewards/accuracies": 0.7124999761581421,
516
- "rewards/chosen": -1.244257926940918,
517
- "rewards/margins": 0.6402724385261536,
518
- "rewards/rejected": -1.8845303058624268,
519
  "step": 320
520
  },
521
  {
522
- "epoch": 0.69,
523
- "learning_rate": 7.883680337481599e-07,
524
- "logits/chosen": 0.7307278513908386,
525
- "logits/rejected": 0.8725861310958862,
526
- "logps/chosen": -448.43280029296875,
527
- "logps/rejected": -533.6476440429688,
528
- "loss": 0.0375,
529
- "rewards/accuracies": 0.6812499761581421,
530
- "rewards/chosen": -1.2363145351409912,
531
- "rewards/margins": 0.7372487187385559,
532
- "rewards/rejected": -1.9735629558563232,
533
  "step": 330
534
  },
535
- {
536
- "epoch": 0.71,
537
- "learning_rate": 6.936646121293654e-07,
538
- "logits/chosen": 0.5649510622024536,
539
- "logits/rejected": 0.7639907598495483,
540
- "logps/chosen": -466.2808532714844,
541
- "logps/rejected": -526.1297607421875,
542
- "loss": 0.0435,
543
- "rewards/accuracies": 0.706250011920929,
544
- "rewards/chosen": -1.0562084913253784,
545
- "rewards/margins": 0.7370297312736511,
546
- "rewards/rejected": -1.7932384014129639,
547
- "step": 340
548
- },
549
- {
550
- "epoch": 0.73,
551
- "learning_rate": 6.032834097207889e-07,
552
- "logits/chosen": 0.7209309935569763,
553
- "logits/rejected": 0.7828409671783447,
554
- "logps/chosen": -401.2094421386719,
555
- "logps/rejected": -480.31671142578125,
556
- "loss": 0.0403,
557
- "rewards/accuracies": 0.699999988079071,
558
- "rewards/chosen": -1.098332405090332,
559
- "rewards/margins": 0.6962517499923706,
560
- "rewards/rejected": -1.7945845127105713,
561
- "step": 350
562
- },
563
- {
564
- "epoch": 0.75,
565
- "learning_rate": 5.177088990820725e-07,
566
- "logits/chosen": 0.6787894368171692,
567
- "logits/rejected": 0.8372275233268738,
568
- "logps/chosen": -446.56317138671875,
569
- "logps/rejected": -465.1809997558594,
570
- "loss": 0.0453,
571
- "rewards/accuracies": 0.6625000238418579,
572
- "rewards/chosen": -1.136115550994873,
573
- "rewards/margins": 0.6250497698783875,
574
- "rewards/rejected": -1.7611652612686157,
575
- "step": 360
576
- },
577
- {
578
- "epoch": 0.77,
579
- "learning_rate": 4.3739978734594494e-07,
580
- "logits/chosen": 0.6346519589424133,
581
- "logits/rejected": 0.867949366569519,
582
- "logps/chosen": -439.4676208496094,
583
- "logps/rejected": -468.6329040527344,
584
- "loss": 0.0364,
585
- "rewards/accuracies": 0.75,
586
- "rewards/chosen": -1.016570806503296,
587
- "rewards/margins": 0.8048780560493469,
588
- "rewards/rejected": -1.8214489221572876,
589
- "step": 370
590
- },
591
- {
592
- "epoch": 0.8,
593
- "learning_rate": 3.627865573992087e-07,
594
- "logits/chosen": 0.6531890630722046,
595
- "logits/rejected": 0.6925245523452759,
596
- "logps/chosen": -437.359375,
597
- "logps/rejected": -492.814453125,
598
- "loss": 0.0425,
599
- "rewards/accuracies": 0.6812499761581421,
600
- "rewards/chosen": -1.09610116481781,
601
- "rewards/margins": 0.6472191214561462,
602
- "rewards/rejected": -1.7433204650878906,
603
- "step": 380
604
- },
605
- {
606
- "epoch": 0.82,
607
- "learning_rate": 2.9426916035484166e-07,
608
- "logits/chosen": 0.4887206554412842,
609
- "logits/rejected": 0.7168077230453491,
610
- "logps/chosen": -490.0777893066406,
611
- "logps/rejected": -530.9293212890625,
612
- "loss": 0.038,
613
- "rewards/accuracies": 0.7875000238418579,
614
- "rewards/chosen": -1.2254283428192139,
615
- "rewards/margins": 0.8675802946090698,
616
- "rewards/rejected": -2.0930087566375732,
617
- "step": 390
618
- },
619
- {
620
- "epoch": 0.84,
621
- "learning_rate": 2.322148716843081e-07,
622
- "logits/chosen": 0.6055541038513184,
623
- "logits/rejected": 0.687682032585144,
624
- "logps/chosen": -429.68603515625,
625
- "logps/rejected": -461.8595275878906,
626
- "loss": 0.0359,
627
- "rewards/accuracies": 0.675000011920929,
628
- "rewards/chosen": -1.1895955801010132,
629
- "rewards/margins": 0.5694113373756409,
630
- "rewards/rejected": -1.7590070962905884,
631
- "step": 400
632
- },
633
- {
634
- "epoch": 0.84,
635
- "eval_logits/chosen": 0.7290832996368408,
636
- "eval_logits/rejected": 0.8196390867233276,
637
- "eval_logps/chosen": -435.5875244140625,
638
- "eval_logps/rejected": -531.2747192382812,
639
- "eval_loss": 0.04154704138636589,
640
- "eval_rewards/accuracies": 0.70703125,
641
- "eval_rewards/chosen": -1.1175758838653564,
642
- "eval_rewards/margins": 0.8938245177268982,
643
- "eval_rewards/rejected": -2.0114002227783203,
644
- "eval_runtime": 75.1852,
645
- "eval_samples_per_second": 26.601,
646
- "eval_steps_per_second": 0.426,
647
- "step": 400
648
- },
649
- {
650
- "epoch": 0.86,
651
- "learning_rate": 1.7695632250191002e-07,
652
- "logits/chosen": 0.5428584814071655,
653
- "logits/rejected": 0.6822582483291626,
654
- "logps/chosen": -435.78680419921875,
655
- "logps/rejected": -452.6622009277344,
656
- "loss": 0.0367,
657
- "rewards/accuracies": 0.6875,
658
- "rewards/chosen": -1.1751288175582886,
659
- "rewards/margins": 0.5176131129264832,
660
- "rewards/rejected": -1.6927419900894165,
661
- "step": 410
662
- },
663
- {
664
- "epoch": 0.88,
665
- "learning_rate": 1.2878971655412515e-07,
666
- "logits/chosen": 0.5744162797927856,
667
- "logits/rejected": 0.6994149088859558,
668
- "logps/chosen": -474.30908203125,
669
- "logps/rejected": -495.92852783203125,
670
- "loss": 0.0394,
671
- "rewards/accuracies": 0.637499988079071,
672
- "rewards/chosen": -1.3165512084960938,
673
- "rewards/margins": 0.6040414571762085,
674
- "rewards/rejected": -1.9205926656723022,
675
- "step": 420
676
- },
677
- {
678
- "epoch": 0.9,
679
- "learning_rate": 8.797324247145411e-08,
680
- "logits/chosen": 0.6493648290634155,
681
- "logits/rejected": 0.6758213043212891,
682
- "logps/chosen": -426.60223388671875,
683
- "logps/rejected": -521.1129150390625,
684
- "loss": 0.0365,
685
- "rewards/accuracies": 0.75,
686
- "rewards/chosen": -1.1816965341567993,
687
- "rewards/margins": 0.7749902009963989,
688
- "rewards/rejected": -1.9566866159439087,
689
- "step": 430
690
- },
691
- {
692
- "epoch": 0.92,
693
- "learning_rate": 5.472568979361853e-08,
694
- "logits/chosen": 0.7012882232666016,
695
- "logits/rejected": 0.7845873832702637,
696
- "logps/chosen": -459.6414489746094,
697
- "logps/rejected": -518.3292846679688,
698
- "loss": 0.0412,
699
- "rewards/accuracies": 0.643750011920929,
700
- "rewards/chosen": -1.2379354238510132,
701
- "rewards/margins": 0.7565950155258179,
702
- "rewards/rejected": -1.994530439376831,
703
- "step": 440
704
- },
705
- {
706
- "epoch": 0.94,
707
- "learning_rate": 2.922527618666465e-08,
708
- "logits/chosen": 0.6378465294837952,
709
- "logits/rejected": 0.8079195022583008,
710
- "logps/chosen": -484.46197509765625,
711
- "logps/rejected": -520.6287841796875,
712
- "loss": 0.0404,
713
- "rewards/accuracies": 0.6937500238418579,
714
- "rewards/chosen": -1.2168313264846802,
715
- "rewards/margins": 0.676922082901001,
716
- "rewards/rejected": -1.8937534093856812,
717
- "step": 450
718
- },
719
  {
720
  "epoch": 0.96,
721
- "learning_rate": 1.1608692138469379e-08,
722
- "logits/chosen": 0.7224764227867126,
723
- "logits/rejected": 0.8670576214790344,
724
- "logps/chosen": -398.640869140625,
725
- "logps/rejected": -444.4422912597656,
726
- "loss": 0.039,
727
- "rewards/accuracies": 0.675000011920929,
728
- "rewards/chosen": -1.110528826713562,
729
- "rewards/margins": 0.6203423738479614,
730
- "rewards/rejected": -1.7308712005615234,
731
- "step": 460
732
  },
733
  {
734
  "epoch": 0.98,
735
- "learning_rate": 1.970368253390198e-09,
736
- "logits/chosen": 0.6133291125297546,
737
- "logits/rejected": 0.744029700756073,
738
- "logps/chosen": -413.31732177734375,
739
- "logps/rejected": -497.829345703125,
740
- "loss": 0.0407,
741
- "rewards/accuracies": 0.699999988079071,
742
- "rewards/chosen": -1.0763300657272339,
743
- "rewards/margins": 0.8329319953918457,
744
- "rewards/rejected": -1.9092620611190796,
745
- "step": 470
746
  },
747
  {
748
  "epoch": 1.0,
749
- "step": 477,
750
  "total_flos": 0.0,
751
- "train_loss": 0.055112330793584664,
752
- "train_runtime": 4571.3444,
753
- "train_samples_per_second": 13.373,
754
- "train_steps_per_second": 0.104
755
  }
756
  ],
757
  "logging_steps": 10,
758
- "max_steps": 477,
759
  "num_train_epochs": 1,
760
- "save_steps": 1000,
761
  "total_flos": 0.0,
762
  "trial_name": null,
763
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9975412715138743,
5
+ "eval_steps": 10000,
6
+ "global_step": 355,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "learning_rate": 8.333333333333334e-07,
14
+ "logits/chosen": -0.08247309923171997,
15
+ "logits/rejected": -0.0386468842625618,
16
+ "logps/chosen": -327.3994140625,
17
+ "logps/rejected": -244.6085968017578,
18
+ "loss": 0.5077,
19
+ "rewards/accuracies": 0.3812499940395355,
20
+ "rewards/chosen": -0.002138084964826703,
21
+ "rewards/margins": -0.0002509051118977368,
22
+ "rewards/rejected": -0.0018871795618906617,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.06,
27
+ "learning_rate": 1.6666666666666669e-06,
28
+ "logits/chosen": -0.045555226504802704,
29
+ "logits/rejected": -0.013953140005469322,
30
+ "logps/chosen": -293.5519104003906,
31
+ "logps/rejected": -201.40576171875,
32
+ "loss": 0.5253,
33
+ "rewards/accuracies": 0.625,
34
+ "rewards/chosen": -0.021771755069494247,
35
+ "rewards/margins": 0.0479663722217083,
36
+ "rewards/rejected": -0.06973812729120255,
37
  "step": 20
38
  },
39
  {
40
+ "epoch": 0.08,
41
+ "learning_rate": 2.5e-06,
42
+ "logits/chosen": -0.0032004565000534058,
43
+ "logits/rejected": 0.0453818179666996,
44
+ "logps/chosen": -361.3478088378906,
45
+ "logps/rejected": -237.36703491210938,
46
+ "loss": 0.5488,
47
+ "rewards/accuracies": 0.637499988079071,
48
+ "rewards/chosen": -0.12262026220560074,
49
+ "rewards/margins": 0.2048310935497284,
50
+ "rewards/rejected": -0.32745134830474854,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 0.11,
55
+ "learning_rate": 2.9988362934929793e-06,
56
+ "logits/chosen": -0.09940309822559357,
57
+ "logits/rejected": -0.05378924682736397,
58
+ "logps/chosen": -314.38812255859375,
59
+ "logps/rejected": -247.3548583984375,
60
+ "loss": 0.5321,
61
+ "rewards/accuracies": 0.6000000238418579,
62
+ "rewards/chosen": -0.10980594158172607,
63
+ "rewards/margins": 0.13736829161643982,
64
+ "rewards/rejected": -0.24717426300048828,
65
  "step": 40
66
  },
67
  {
68
+ "epoch": 0.14,
69
+ "learning_rate": 2.985765322825759e-06,
70
+ "logits/chosen": -0.1258987933397293,
71
+ "logits/rejected": -0.09743531048297882,
72
+ "logps/chosen": -320.26220703125,
73
+ "logps/rejected": -259.95989990234375,
74
+ "loss": 0.484,
75
+ "rewards/accuracies": 0.543749988079071,
76
+ "rewards/chosen": -0.047640036791563034,
77
+ "rewards/margins": 0.12707160413265228,
78
+ "rewards/rejected": -0.17471164464950562,
79
  "step": 50
80
  },
81
  {
82
+ "epoch": 0.17,
83
+ "learning_rate": 2.9582958419982717e-06,
84
+ "logits/chosen": -0.20755784213542938,
85
+ "logits/rejected": -0.14810998737812042,
86
+ "logps/chosen": -397.5509948730469,
87
+ "logps/rejected": -250.44027709960938,
88
+ "loss": 0.463,
89
+ "rewards/accuracies": 0.699999988079071,
90
+ "rewards/chosen": -0.10637468099594116,
91
+ "rewards/margins": 0.37280240654945374,
92
+ "rewards/rejected": -0.4791770875453949,
93
  "step": 60
94
  },
95
  {
96
+ "epoch": 0.2,
97
+ "learning_rate": 2.916694056980408e-06,
98
+ "logits/chosen": -0.19774258136749268,
99
+ "logits/rejected": -0.12501199543476105,
100
+ "logps/chosen": -388.07427978515625,
101
+ "logps/rejected": -266.5827331542969,
102
+ "loss": 0.4471,
103
+ "rewards/accuracies": 0.625,
104
+ "rewards/chosen": -0.19550617039203644,
105
+ "rewards/margins": 0.24603180587291718,
106
+ "rewards/rejected": -0.4415379464626312,
107
  "step": 70
108
  },
109
  {
110
+ "epoch": 0.22,
111
+ "learning_rate": 2.8613631295064357e-06,
112
+ "logits/chosen": -0.168908953666687,
113
+ "logits/rejected": -0.16765542328357697,
114
+ "logps/chosen": -288.5673828125,
115
+ "logps/rejected": -233.2335968017578,
116
+ "loss": 0.4418,
117
+ "rewards/accuracies": 0.643750011920929,
118
+ "rewards/chosen": -0.2916755676269531,
119
+ "rewards/margins": 0.2403053343296051,
120
+ "rewards/rejected": -0.5319808721542358,
121
  "step": 80
122
  },
123
  {
124
+ "epoch": 0.25,
125
+ "learning_rate": 2.792839270045916e-06,
126
+ "logits/chosen": -0.3168974816799164,
127
+ "logits/rejected": -0.27101725339889526,
128
+ "logps/chosen": -395.9394226074219,
129
+ "logps/rejected": -256.7802734375,
130
+ "loss": 0.4071,
131
+ "rewards/accuracies": 0.6875,
132
+ "rewards/chosen": -0.26048293709754944,
133
+ "rewards/margins": 0.4022350311279297,
134
+ "rewards/rejected": -0.6627179980278015,
135
  "step": 90
136
  },
137
  {
138
+ "epoch": 0.28,
139
+ "learning_rate": 2.711786541403051e-06,
140
+ "logits/chosen": -0.33102917671203613,
141
+ "logits/rejected": -0.338579922914505,
142
+ "logps/chosen": -387.654296875,
143
+ "logps/rejected": -305.7378845214844,
144
+ "loss": 0.379,
145
+ "rewards/accuracies": 0.625,
146
+ "rewards/chosen": -0.4444798529148102,
147
+ "rewards/margins": 0.19619156420230865,
148
+ "rewards/rejected": -0.6406713724136353,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "step": 100
150
  },
151
  {
152
+ "epoch": 0.31,
153
+ "learning_rate": 2.6189904233026363e-06,
154
+ "logits/chosen": -0.4311809539794922,
155
+ "logits/rejected": -0.3962547183036804,
156
+ "logps/chosen": -351.5871887207031,
157
+ "logps/rejected": -305.3436584472656,
158
+ "loss": 0.3607,
159
+ "rewards/accuracies": 0.543749988079071,
160
+ "rewards/chosen": -0.5928763151168823,
161
+ "rewards/margins": 0.10657763481140137,
162
+ "rewards/rejected": -0.6994539499282837,
163
  "step": 110
164
  },
165
  {
166
+ "epoch": 0.34,
167
+ "learning_rate": 2.515350200328027e-06,
168
+ "logits/chosen": -0.4205436706542969,
169
+ "logits/rejected": -0.40109872817993164,
170
+ "logps/chosen": -366.4017028808594,
171
+ "logps/rejected": -258.0574951171875,
172
+ "loss": 0.3644,
173
+ "rewards/accuracies": 0.675000011920929,
174
+ "rewards/chosen": -0.48554643988609314,
175
+ "rewards/margins": 0.24580618739128113,
176
+ "rewards/rejected": -0.7313526272773743,
177
  "step": 120
178
  },
179
  {
180
+ "epoch": 0.37,
181
+ "learning_rate": 2.401870246979413e-06,
182
+ "logits/chosen": -0.36771100759506226,
183
+ "logits/rejected": -0.31752774119377136,
184
+ "logps/chosen": -408.0353088378906,
185
+ "logps/rejected": -279.37872314453125,
186
+ "loss": 0.3791,
187
+ "rewards/accuracies": 0.699999988079071,
188
+ "rewards/chosen": -0.5797132849693298,
189
+ "rewards/margins": 0.35475510358810425,
190
+ "rewards/rejected": -0.9344683885574341,
191
  "step": 130
192
  },
193
  {
194
+ "epoch": 0.39,
195
+ "learning_rate": 2.279650294308645e-06,
196
+ "logits/chosen": -0.3124179244041443,
197
+ "logits/rejected": -0.3357269763946533,
198
+ "logps/chosen": -386.987548828125,
199
+ "logps/rejected": -315.2652587890625,
200
+ "loss": 0.3217,
201
+ "rewards/accuracies": 0.59375,
202
+ "rewards/chosen": -0.880761981010437,
203
+ "rewards/margins": 0.18241076171398163,
204
+ "rewards/rejected": -1.063172698020935,
205
  "step": 140
206
  },
207
  {
208
+ "epoch": 0.42,
209
+ "learning_rate": 2.1498747724563957e-06,
210
+ "logits/chosen": -0.237782284617424,
211
+ "logits/rejected": -0.2272220402956009,
212
+ "logps/chosen": -347.40618896484375,
213
+ "logps/rejected": -309.7053527832031,
214
+ "loss": 0.32,
215
+ "rewards/accuracies": 0.6187499761581421,
216
+ "rewards/chosen": -0.775266706943512,
217
+ "rewards/margins": 0.20919008553028107,
218
+ "rewards/rejected": -0.9844567179679871,
219
  "step": 150
220
  },
221
  {
222
+ "epoch": 0.45,
223
+ "learning_rate": 2.0138013323728074e-06,
224
+ "logits/chosen": -0.3776048719882965,
225
+ "logits/rejected": -0.3391488790512085,
226
+ "logps/chosen": -430.7017517089844,
227
+ "logps/rejected": -327.8933410644531,
228
+ "loss": 0.3115,
229
+ "rewards/accuracies": 0.668749988079071,
230
+ "rewards/chosen": -0.8183802366256714,
231
+ "rewards/margins": 0.30396735668182373,
232
+ "rewards/rejected": -1.1223475933074951,
233
  "step": 160
234
  },
235
  {
236
+ "epoch": 0.48,
237
+ "learning_rate": 1.8727486579573409e-06,
238
+ "logits/chosen": -0.3819672465324402,
239
+ "logits/rejected": -0.38335323333740234,
240
+ "logps/chosen": -376.9703369140625,
241
+ "logps/rejected": -336.7782287597656,
242
+ "loss": 0.3038,
243
+ "rewards/accuracies": 0.59375,
244
+ "rewards/chosen": -0.815502941608429,
245
+ "rewards/margins": 0.2603687345981598,
246
+ "rewards/rejected": -1.0758715867996216,
247
  "step": 170
248
  },
249
  {
250
+ "epoch": 0.51,
251
+ "learning_rate": 1.7280836867300083e-06,
252
+ "logits/chosen": -0.5107148885726929,
253
+ "logits/rejected": -0.5026928186416626,
254
+ "logps/chosen": -388.6782531738281,
255
+ "logps/rejected": -321.05328369140625,
256
+ "loss": 0.3147,
257
+ "rewards/accuracies": 0.59375,
258
+ "rewards/chosen": -0.8670403361320496,
259
+ "rewards/margins": 0.20068030059337616,
260
+ "rewards/rejected": -1.067720651626587,
261
  "step": 180
262
  },
263
  {
264
+ "epoch": 0.53,
265
+ "learning_rate": 1.5812083628781265e-06,
266
+ "logits/chosen": -0.5208634734153748,
267
+ "logits/rejected": -0.506948709487915,
268
+ "logps/chosen": -409.93951416015625,
269
+ "logps/rejected": -310.84381103515625,
270
+ "loss": 0.2825,
271
+ "rewards/accuracies": 0.643750011920929,
272
+ "rewards/chosen": -0.8959806561470032,
273
+ "rewards/margins": 0.3938984274864197,
274
+ "rewards/rejected": -1.2898790836334229,
275
  "step": 190
276
  },
277
  {
278
+ "epoch": 0.56,
279
+ "learning_rate": 1.433546051054432e-06,
280
+ "logits/chosen": -0.5745254158973694,
281
+ "logits/rejected": -0.5329603552818298,
282
+ "logps/chosen": -414.594970703125,
283
+ "logps/rejected": -349.5610656738281,
284
+ "loss": 0.2849,
285
+ "rewards/accuracies": 0.53125,
286
+ "rewards/chosen": -1.2510201930999756,
287
+ "rewards/margins": 0.1971484124660492,
288
+ "rewards/rejected": -1.4481686353683472,
289
  "step": 200
290
  },
291
  {
292
+ "epoch": 0.59,
293
+ "learning_rate": 1.2865277425900725e-06,
294
+ "logits/chosen": -0.4251164495944977,
295
+ "logits/rejected": -0.4111138880252838,
296
+ "logps/chosen": -446.27685546875,
297
+ "logps/rejected": -339.98321533203125,
298
+ "loss": 0.2914,
299
+ "rewards/accuracies": 0.625,
300
+ "rewards/chosen": -0.8489357829093933,
301
+ "rewards/margins": 0.3122832179069519,
302
+ "rewards/rejected": -1.1612190008163452,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  "step": 210
304
  },
305
  {
306
+ "epoch": 0.62,
307
+ "learning_rate": 1.141578187797663e-06,
308
+ "logits/chosen": -0.5450788736343384,
309
+ "logits/rejected": -0.5059890151023865,
310
+ "logps/chosen": -417.80908203125,
311
+ "logps/rejected": -308.05596923828125,
312
+ "loss": 0.2944,
313
+ "rewards/accuracies": 0.6187499761581421,
314
+ "rewards/chosen": -0.8429006338119507,
315
+ "rewards/margins": 0.24668729305267334,
316
+ "rewards/rejected": -1.089587926864624,
317
  "step": 220
318
  },
319
  {
320
+ "epoch": 0.65,
321
+ "learning_rate": 1.0001020887558839e-06,
322
+ "logits/chosen": -0.47975045442581177,
323
+ "logits/rejected": -0.4807058274745941,
324
+ "logps/chosen": -387.7288818359375,
325
+ "logps/rejected": -303.9996643066406,
326
+ "loss": 0.292,
327
+ "rewards/accuracies": 0.6000000238418579,
328
+ "rewards/chosen": -0.8383504748344421,
329
+ "rewards/margins": 0.226647287607193,
330
+ "rewards/rejected": -1.064997911453247,
331
  "step": 230
332
  },
333
  {
334
+ "epoch": 0.67,
335
+ "learning_rate": 8.634704863809502e-07,
336
+ "logits/chosen": -0.5008819103240967,
337
+ "logits/rejected": -0.46626418828964233,
338
+ "logps/chosen": -436.1036071777344,
339
+ "logps/rejected": -323.905029296875,
340
+ "loss": 0.2845,
341
+ "rewards/accuracies": 0.6187499761581421,
342
+ "rewards/chosen": -0.8871256709098816,
343
+ "rewards/margins": 0.21674367785453796,
344
+ "rewards/rejected": -1.1038693189620972,
345
  "step": 240
346
  },
347
  {
348
+ "epoch": 0.7,
349
+ "learning_rate": 7.330074737074666e-07,
350
+ "logits/chosen": -0.5267480611801147,
351
+ "logits/rejected": -0.528629720211029,
352
+ "logps/chosen": -440.72528076171875,
353
+ "logps/rejected": -327.6587219238281,
354
+ "loss": 0.2911,
355
+ "rewards/accuracies": 0.6312500238418579,
356
+ "rewards/chosen": -0.8738571405410767,
357
+ "rewards/margins": 0.38728970289230347,
358
+ "rewards/rejected": -1.2611467838287354,
359
  "step": 250
360
  },
361
  {
362
+ "epoch": 0.73,
363
+ "learning_rate": 6.099773641398835e-07,
364
+ "logits/chosen": -0.5498321056365967,
365
+ "logits/rejected": -0.5135624408721924,
366
+ "logps/chosen": -430.474609375,
367
+ "logps/rejected": -340.8508605957031,
368
+ "loss": 0.3049,
369
+ "rewards/accuracies": 0.65625,
370
+ "rewards/chosen": -0.9756007194519043,
371
+ "rewards/margins": 0.3439735174179077,
372
+ "rewards/rejected": -1.319574236869812,
373
  "step": 260
374
  },
375
  {
376
+ "epoch": 0.76,
377
+ "learning_rate": 4.955724390266841e-07,
378
+ "logits/chosen": -0.5001212358474731,
379
+ "logits/rejected": -0.509675920009613,
380
+ "logps/chosen": -397.41259765625,
381
+ "logps/rejected": -336.1043395996094,
382
+ "loss": 0.2934,
383
+ "rewards/accuracies": 0.637499988079071,
384
+ "rewards/chosen": -0.9926079511642456,
385
+ "rewards/margins": 0.33142027258872986,
386
+ "rewards/rejected": -1.3240282535552979,
387
  "step": 270
388
  },
389
  {
390
+ "epoch": 0.79,
391
+ "learning_rate": 3.9090139329520333e-07,
392
+ "logits/chosen": -0.4506424367427826,
393
+ "logits/rejected": -0.4490571916103363,
394
+ "logps/chosen": -455.0115661621094,
395
+ "logps/rejected": -344.4391784667969,
396
+ "loss": 0.3013,
397
+ "rewards/accuracies": 0.637499988079071,
398
+ "rewards/chosen": -1.018028974533081,
399
+ "rewards/margins": 0.3979285955429077,
400
+ "rewards/rejected": -1.4159575700759888,
401
  "step": 280
402
  },
403
  {
404
+ "epoch": 0.81,
405
+ "learning_rate": 2.9697859112011724e-07,
406
+ "logits/chosen": -0.47781458497047424,
407
+ "logits/rejected": -0.4574874937534332,
408
+ "logps/chosen": -417.3575744628906,
409
+ "logps/rejected": -349.57037353515625,
410
+ "loss": 0.3021,
411
+ "rewards/accuracies": 0.5625,
412
+ "rewards/chosen": -1.0533437728881836,
413
+ "rewards/margins": 0.22412605583667755,
414
+ "rewards/rejected": -1.2774698734283447,
415
  "step": 290
416
  },
417
  {
418
+ "epoch": 0.84,
419
+ "learning_rate": 2.1471423574861643e-07,
420
+ "logits/chosen": -0.49651557207107544,
421
+ "logits/rejected": -0.46776896715164185,
422
+ "logps/chosen": -429.81732177734375,
423
+ "logps/rejected": -326.6114807128906,
424
+ "loss": 0.2961,
425
+ "rewards/accuracies": 0.6312500238418579,
426
+ "rewards/chosen": -0.8363375663757324,
427
+ "rewards/margins": 0.38524097204208374,
428
+ "rewards/rejected": -1.2215787172317505,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  "step": 300
430
  },
431
  {
432
+ "epoch": 0.87,
433
+ "learning_rate": 1.449055487462102e-07,
434
+ "logits/chosen": -0.47061723470687866,
435
+ "logits/rejected": -0.4526177942752838,
436
+ "logps/chosen": -401.2471618652344,
437
+ "logps/rejected": -344.2327575683594,
438
+ "loss": 0.2998,
439
+ "rewards/accuracies": 0.6499999761581421,
440
+ "rewards/chosen": -0.8958613276481628,
441
+ "rewards/margins": 0.30968743562698364,
442
+ "rewards/rejected": -1.205548644065857,
443
  "step": 310
444
  },
445
  {
446
+ "epoch": 0.9,
447
+ "learning_rate": 8.822904414485194e-08,
448
+ "logits/chosen": -0.4069460928440094,
449
+ "logits/rejected": -0.413198858499527,
450
+ "logps/chosen": -376.17364501953125,
451
+ "logps/rejected": -327.5869140625,
452
+ "loss": 0.2994,
453
+ "rewards/accuracies": 0.59375,
454
+ "rewards/chosen": -0.8381717801094055,
455
+ "rewards/margins": 0.24178913235664368,
456
+ "rewards/rejected": -1.079960823059082,
457
  "step": 320
458
  },
459
  {
460
+ "epoch": 0.93,
461
+ "learning_rate": 4.523397236438398e-08,
462
+ "logits/chosen": -0.5108148455619812,
463
+ "logits/rejected": -0.49169641733169556,
464
+ "logps/chosen": -478.1392517089844,
465
+ "logps/rejected": -382.9190979003906,
466
+ "loss": 0.2897,
467
+ "rewards/accuracies": 0.668749988079071,
468
+ "rewards/chosen": -0.9290350079536438,
469
+ "rewards/margins": 0.41925033926963806,
470
+ "rewards/rejected": -1.3482853174209595,
471
  "step": 330
472
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  {
474
  "epoch": 0.96,
475
+ "learning_rate": 1.6336997442095825e-08,
476
+ "logits/chosen": -0.4700957238674164,
477
+ "logits/rejected": -0.43701282143592834,
478
+ "logps/chosen": -426.40631103515625,
479
+ "logps/rejected": -343.24334716796875,
480
+ "loss": 0.3046,
481
+ "rewards/accuracies": 0.6187499761581421,
482
+ "rewards/chosen": -0.9494872093200684,
483
+ "rewards/margins": 0.334994375705719,
484
+ "rewards/rejected": -1.2844815254211426,
485
+ "step": 340
486
  },
487
  {
488
  "epoch": 0.98,
489
+ "learning_rate": 1.8181591531977737e-09,
490
+ "logits/chosen": -0.5112472176551819,
491
+ "logits/rejected": -0.5403722524642944,
492
+ "logps/chosen": -407.7789611816406,
493
+ "logps/rejected": -344.8587951660156,
494
+ "loss": 0.3017,
495
+ "rewards/accuracies": 0.675000011920929,
496
+ "rewards/chosen": -0.9037674069404602,
497
+ "rewards/margins": 0.4302978515625,
498
+ "rewards/rejected": -1.334065318107605,
499
+ "step": 350
500
  },
501
  {
502
  "epoch": 1.0,
503
+ "step": 355,
504
  "total_flos": 0.0,
505
+ "train_loss": 0.3547657697973117,
506
+ "train_runtime": 5270.9361,
507
+ "train_samples_per_second": 8.641,
508
+ "train_steps_per_second": 0.067
509
  }
510
  ],
511
  "logging_steps": 10,
512
+ "max_steps": 355,
513
  "num_train_epochs": 1,
514
+ "save_steps": 10000,
515
  "total_flos": 0.0,
516
  "trial_name": null,
517
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200542098b43881df0df6dc0ff3056ca0236db5763f486bb392f305292932d2f
3
- size 5944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:773f035981526cb91d8da745ac00a062e7cff067bade23d7346497d28717689d
3
+ size 6648