wzhouad commited on
Commit
01f9e8f
1 Parent(s): 64a92e3

Model save

Browse files
README.md CHANGED
@@ -14,16 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # zephyr-7b-dpo-full
15
 
16
  This model was trained from scratch on the None dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 0.5054
19
- - Rewards/chosen: -1.3174
20
- - Rewards/rejected: -2.4481
21
- - Rewards/accuracies: 0.7773
22
- - Rewards/margins: 1.1307
23
- - Logps/rejected: -556.4355
24
- - Logps/chosen: -435.3261
25
- - Logits/rejected: -0.1274
26
- - Logits/chosen: -0.0846
27
 
28
  ## Model description
29
 
@@ -42,13 +32,13 @@ More information needed
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
- - learning_rate: 3e-06
46
- - train_batch_size: 4
47
  - eval_batch_size: 8
48
- - seed: 3
49
  - distributed_type: multi-GPU
50
  - num_devices: 8
51
- - gradient_accumulation_steps: 4
52
  - total_train_batch_size: 128
53
  - total_eval_batch_size: 64
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -58,12 +48,6 @@ The following hyperparameters were used during training:
58
 
59
  ### Training results
60
 
61
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.5502 | 0.21 | 100 | 0.5705 | -1.1110 | -1.9243 | 0.7578 | 0.8133 | -504.0533 | -414.6870 | -0.1991 | -0.2234 |
64
- | 0.539 | 0.42 | 200 | 0.5325 | -1.2893 | -2.1763 | 0.7578 | 0.8870 | -529.2560 | -432.5124 | 0.1917 | 0.1517 |
65
- | 0.528 | 0.63 | 300 | 0.5161 | -1.4780 | -2.6138 | 0.7578 | 1.1358 | -573.0014 | -451.3821 | 0.1045 | 0.1296 |
66
- | 0.5049 | 0.84 | 400 | 0.5054 | -1.3174 | -2.4481 | 0.7773 | 1.1307 | -556.4355 | -435.3261 | -0.1274 | -0.0846 |
67
 
68
 
69
  ### Framework versions
 
14
  # zephyr-7b-dpo-full
15
 
16
  This model was trained from scratch on the None dataset.
 
 
 
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 1e-06
36
+ - train_batch_size: 2
37
  - eval_batch_size: 8
38
+ - seed: 1
39
  - distributed_type: multi-GPU
40
  - num_devices: 8
41
+ - gradient_accumulation_steps: 8
42
  - total_train_batch_size: 128
43
  - total_eval_batch_size: 64
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
48
 
49
  ### Training results
50
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5426262839535247,
4
- "train_runtime": 4328.2164,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 14.125,
7
- "train_steps_per_second": 0.11
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.464236611379704,
4
+ "train_runtime": 5271.2295,
5
+ "train_samples": 45548,
6
+ "train_samples_per_second": 8.641,
7
+ "train_steps_per_second": 0.067
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1ad713d321f72b4f6dee955bf279416bc9a375ca6b8ee6bd1648ec8f26dc08d
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:366bb9560cc47efbca54034565f6513238203e5c0f566fddc150a9d3e6085bfa
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ab3eb1ce23074d25dd5354b0630fc90dcb29f8a44bd558e73222da30d71e1b7
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2120eb8af6751d1817786193d0d4ef3daf4da774c7049a6883347a4245178ff
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:951657e3c35c06e4d339cd40ffbd2fb90d4fa240970e6a9a05e6beede4607a5a
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6e8c563ecda5484c9528a79ef74141bcc4fc0c565785c2d6adf764a77545114
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52adaaebdf92d341736955fe50fe4178cf74714cb5d47cf7ec73b6ef772497db
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7239e159560eaf7980449da5998b76d7130edd7090b3808a7b04eeed33600e
3
  size 1168138808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.5426262839535247,
4
- "train_runtime": 4328.2164,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 14.125,
7
- "train_steps_per_second": 0.11
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.464236611379704,
4
+ "train_runtime": 5271.2295,
5
+ "train_samples": 45548,
6
+ "train_samples_per_second": 8.641,
7
+ "train_steps_per_second": 0.067
8
  }
trainer_state.json CHANGED
@@ -1,749 +1,517 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9984301412872841,
5
- "eval_steps": 100,
6
- "global_step": 477,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.02,
13
- "learning_rate": 6.25e-07,
14
- "logits/chosen": 0.2209470570087433,
15
- "logits/rejected": 0.3076450824737549,
16
- "logps/chosen": -324.3524169921875,
17
- "logps/rejected": -285.35931396484375,
18
- "loss": 0.6932,
19
- "rewards/accuracies": 0.45625001192092896,
20
- "rewards/chosen": 0.0010343596804887056,
21
- "rewards/margins": 0.0006591519340872765,
22
- "rewards/rejected": 0.0003752077464014292,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.04,
27
- "learning_rate": 1.25e-06,
28
- "logits/chosen": 0.3226737976074219,
29
- "logits/rejected": 0.4124608039855957,
30
- "logps/chosen": -323.83349609375,
31
- "logps/rejected": -290.1531677246094,
32
- "loss": 0.6877,
33
- "rewards/accuracies": 0.6312500238418579,
34
- "rewards/chosen": -0.0006101070903241634,
35
- "rewards/margins": 0.011370119638741016,
36
- "rewards/rejected": -0.011980227194726467,
37
  "step": 20
38
  },
39
  {
40
- "epoch": 0.06,
41
- "learning_rate": 1.875e-06,
42
- "logits/chosen": 0.37067800760269165,
43
- "logits/rejected": 0.4101516604423523,
44
- "logps/chosen": -312.599609375,
45
- "logps/rejected": -317.5506896972656,
46
- "loss": 0.6643,
47
- "rewards/accuracies": 0.6812499761581421,
48
- "rewards/chosen": -0.022324323654174805,
49
- "rewards/margins": 0.08467327058315277,
50
- "rewards/rejected": -0.10699759423732758,
51
  "step": 30
52
  },
53
  {
54
- "epoch": 0.08,
55
- "learning_rate": 2.5e-06,
56
- "logits/chosen": 0.3572315275669098,
57
- "logits/rejected": 0.4867202639579773,
58
- "logps/chosen": -369.9970703125,
59
- "logps/rejected": -357.2043762207031,
60
- "loss": 0.6422,
61
- "rewards/accuracies": 0.6499999761581421,
62
- "rewards/chosen": -0.2718835771083832,
63
- "rewards/margins": 0.19485555589199066,
64
- "rewards/rejected": -0.46673911809921265,
65
  "step": 40
66
  },
67
  {
68
- "epoch": 0.1,
69
- "learning_rate": 2.999839121261416e-06,
70
- "logits/chosen": 0.02461487613618374,
71
- "logits/rejected": 0.1311604082584381,
72
- "logps/chosen": -397.81427001953125,
73
- "logps/rejected": -374.7628173828125,
74
- "loss": 0.6205,
75
- "rewards/accuracies": 0.6187499761581421,
76
- "rewards/chosen": -0.30967187881469727,
77
- "rewards/margins": 0.26407095789909363,
78
- "rewards/rejected": -0.5737428069114685,
79
  "step": 50
80
  },
81
  {
82
- "epoch": 0.13,
83
- "learning_rate": 2.994211988057582e-06,
84
- "logits/chosen": -0.08134393393993378,
85
- "logits/rejected": -0.0017544396687299013,
86
- "logps/chosen": -334.15093994140625,
87
- "logps/rejected": -356.8257141113281,
88
- "loss": 0.5972,
89
- "rewards/accuracies": 0.706250011920929,
90
- "rewards/chosen": -0.4183749258518219,
91
- "rewards/margins": 0.3044845461845398,
92
- "rewards/rejected": -0.7228595018386841,
93
  "step": 60
94
  },
95
  {
96
- "epoch": 0.15,
97
- "learning_rate": 2.9805753939568693e-06,
98
- "logits/chosen": -0.12287361919879913,
99
- "logits/rejected": -0.03368464112281799,
100
- "logps/chosen": -365.60296630859375,
101
- "logps/rejected": -404.632568359375,
102
- "loss": 0.5902,
103
- "rewards/accuracies": 0.71875,
104
- "rewards/chosen": -0.6079816818237305,
105
- "rewards/margins": 0.4893825948238373,
106
- "rewards/rejected": -1.0973644256591797,
107
  "step": 70
108
  },
109
  {
110
- "epoch": 0.17,
111
- "learning_rate": 2.959002435526626e-06,
112
- "logits/chosen": -0.17652544379234314,
113
- "logits/rejected": -0.11289135366678238,
114
- "logps/chosen": -397.5993957519531,
115
- "logps/rejected": -454.41571044921875,
116
- "loss": 0.5711,
117
- "rewards/accuracies": 0.7124999761581421,
118
- "rewards/chosen": -0.8409433364868164,
119
- "rewards/margins": 0.5162743926048279,
120
- "rewards/rejected": -1.357217788696289,
121
  "step": 80
122
  },
123
  {
124
- "epoch": 0.19,
125
- "learning_rate": 2.929608750821129e-06,
126
- "logits/chosen": -0.3760049641132355,
127
- "logits/rejected": -0.28880029916763306,
128
- "logps/chosen": -430.817138671875,
129
- "logps/rejected": -447.759033203125,
130
- "loss": 0.5512,
131
- "rewards/accuracies": 0.699999988079071,
132
- "rewards/chosen": -0.9714916944503784,
133
- "rewards/margins": 0.6265453696250916,
134
- "rewards/rejected": -1.5980370044708252,
135
  "step": 90
136
  },
137
  {
138
- "epoch": 0.21,
139
- "learning_rate": 2.892551899524109e-06,
140
- "logits/chosen": -0.30831730365753174,
141
- "logits/rejected": -0.2624167501926422,
142
- "logps/chosen": -448.5101623535156,
143
- "logps/rejected": -532.5578002929688,
144
- "loss": 0.5502,
145
- "rewards/accuracies": 0.6875,
146
- "rewards/chosen": -1.1964621543884277,
147
- "rewards/margins": 0.740709662437439,
148
- "rewards/rejected": -1.9371719360351562,
149
  "step": 100
150
  },
151
  {
152
- "epoch": 0.21,
153
- "eval_logits/chosen": -0.22339792549610138,
154
- "eval_logits/rejected": -0.1990877389907837,
155
- "eval_logps/chosen": -414.68701171875,
156
- "eval_logps/rejected": -504.0533142089844,
157
- "eval_loss": 0.5704939365386963,
158
- "eval_rewards/accuracies": 0.7578125,
159
- "eval_rewards/chosen": -1.1110235452651978,
160
- "eval_rewards/margins": 0.8132585287094116,
161
- "eval_rewards/rejected": -1.9242819547653198,
162
- "eval_runtime": 64.9663,
163
- "eval_samples_per_second": 30.785,
164
- "eval_steps_per_second": 0.493,
165
- "step": 100
166
- },
167
- {
168
- "epoch": 0.23,
169
- "learning_rate": 2.848030518377739e-06,
170
- "logits/chosen": -0.23117919266223907,
171
- "logits/rejected": -0.1292828470468521,
172
- "logps/chosen": -420.55633544921875,
173
- "logps/rejected": -438.8388671875,
174
- "loss": 0.5707,
175
- "rewards/accuracies": 0.731249988079071,
176
- "rewards/chosen": -0.9971814155578613,
177
- "rewards/margins": 0.49195510149002075,
178
- "rewards/rejected": -1.4891364574432373,
179
  "step": 110
180
  },
181
  {
182
- "epoch": 0.25,
183
- "learning_rate": 2.7962832564252724e-06,
184
- "logits/chosen": -0.19192993640899658,
185
- "logits/rejected": -0.07996977120637894,
186
- "logps/chosen": -423.1610412597656,
187
- "logps/rejected": -425.53839111328125,
188
- "loss": 0.5379,
189
- "rewards/accuracies": 0.7875000238418579,
190
- "rewards/chosen": -0.8748111724853516,
191
- "rewards/margins": 0.6682790517807007,
192
- "rewards/rejected": -1.5430901050567627,
193
  "step": 120
194
  },
195
  {
196
- "epoch": 0.27,
197
- "learning_rate": 2.7375874957747644e-06,
198
- "logits/chosen": -0.1726234257221222,
199
- "logits/rejected": -0.08887670934200287,
200
- "logps/chosen": -431.18365478515625,
201
- "logps/rejected": -499.1405334472656,
202
- "loss": 0.5636,
203
- "rewards/accuracies": 0.75,
204
- "rewards/chosen": -1.0607411861419678,
205
- "rewards/margins": 0.934187114238739,
206
- "rewards/rejected": -1.9949283599853516,
207
  "step": 130
208
  },
209
  {
210
- "epoch": 0.29,
211
- "learning_rate": 2.672257864741005e-06,
212
- "logits/chosen": -0.15248467028141022,
213
- "logits/rejected": -0.09780655056238174,
214
- "logps/chosen": -401.5557556152344,
215
- "logps/rejected": -453.06341552734375,
216
- "loss": 0.5422,
217
- "rewards/accuracies": 0.731249988079071,
218
- "rewards/chosen": -0.9503288269042969,
219
- "rewards/margins": 0.6491050124168396,
220
- "rewards/rejected": -1.5994337797164917,
221
  "step": 140
222
  },
223
  {
224
- "epoch": 0.31,
225
- "learning_rate": 2.600644551335706e-06,
226
- "logits/chosen": -0.06402697414159775,
227
- "logits/rejected": -0.022289589047431946,
228
- "logps/chosen": -436.34893798828125,
229
- "logps/rejected": -519.2093505859375,
230
- "loss": 0.5418,
231
- "rewards/accuracies": 0.78125,
232
- "rewards/chosen": -1.1614066362380981,
233
- "rewards/margins": 1.014080286026001,
234
- "rewards/rejected": -2.1754870414733887,
235
  "step": 150
236
  },
237
  {
238
- "epoch": 0.33,
239
- "learning_rate": 2.5231314261461732e-06,
240
- "logits/chosen": -0.13501138985157013,
241
- "logits/rejected": -0.03512698411941528,
242
- "logps/chosen": -405.4765319824219,
243
- "logps/rejected": -474.7965393066406,
244
- "loss": 0.5315,
245
- "rewards/accuracies": 0.762499988079071,
246
- "rewards/chosen": -1.0841046571731567,
247
- "rewards/margins": 0.7699181437492371,
248
- "rewards/rejected": -1.854022741317749,
249
  "step": 160
250
  },
251
  {
252
- "epoch": 0.36,
253
- "learning_rate": 2.440133984664454e-06,
254
- "logits/chosen": -0.11433794349431992,
255
- "logits/rejected": -0.04908312112092972,
256
- "logps/chosen": -425.5057678222656,
257
- "logps/rejected": -513.4010009765625,
258
- "loss": 0.5526,
259
- "rewards/accuracies": 0.65625,
260
- "rewards/chosen": -1.2522752285003662,
261
- "rewards/margins": 0.9037529230117798,
262
- "rewards/rejected": -2.1560280323028564,
263
  "step": 170
264
  },
265
  {
266
- "epoch": 0.38,
267
- "learning_rate": 2.3520971200967337e-06,
268
- "logits/chosen": -0.2898910641670227,
269
- "logits/rejected": -0.11892978847026825,
270
- "logps/chosen": -461.5921936035156,
271
- "logps/rejected": -437.5884704589844,
272
- "loss": 0.5491,
273
- "rewards/accuracies": 0.706250011920929,
274
- "rewards/chosen": -1.0226460695266724,
275
- "rewards/margins": 0.5135943293571472,
276
- "rewards/rejected": -1.5362403392791748,
277
  "step": 180
278
  },
279
  {
280
- "epoch": 0.4,
281
- "learning_rate": 2.2594927385914546e-06,
282
- "logits/chosen": -0.11167088896036148,
283
- "logits/rejected": -0.024417612701654434,
284
- "logps/chosen": -459.20880126953125,
285
- "logps/rejected": -484.350341796875,
286
- "loss": 0.537,
287
- "rewards/accuracies": 0.737500011920929,
288
- "rewards/chosen": -1.387242078781128,
289
- "rewards/margins": 0.6068987250328064,
290
- "rewards/rejected": -1.994140863418579,
291
  "step": 190
292
  },
293
  {
294
- "epoch": 0.42,
295
- "learning_rate": 2.1628172296692954e-06,
296
- "logits/chosen": 0.08049922436475754,
297
- "logits/rejected": 0.168174147605896,
298
- "logps/chosen": -408.86431884765625,
299
- "logps/rejected": -484.8013610839844,
300
- "loss": 0.539,
301
- "rewards/accuracies": 0.737500011920929,
302
- "rewards/chosen": -1.2882415056228638,
303
- "rewards/margins": 0.807357668876648,
304
- "rewards/rejected": -2.095599412918091,
305
  "step": 200
306
  },
307
  {
308
- "epoch": 0.42,
309
- "eval_logits/chosen": 0.15173441171646118,
310
- "eval_logits/rejected": 0.19174005091190338,
311
- "eval_logps/chosen": -432.51239013671875,
312
- "eval_logps/rejected": -529.2559814453125,
313
- "eval_loss": 0.5325208306312561,
314
- "eval_rewards/accuracies": 0.7578125,
315
- "eval_rewards/chosen": -1.289277195930481,
316
- "eval_rewards/margins": 0.8870314955711365,
317
- "eval_rewards/rejected": -2.1763086318969727,
318
- "eval_runtime": 64.8606,
319
- "eval_samples_per_second": 30.835,
320
- "eval_steps_per_second": 0.493,
321
- "step": 200
322
- },
323
- {
324
- "epoch": 0.44,
325
- "learning_rate": 2.062588805414343e-06,
326
- "logits/chosen": 0.2150273621082306,
327
- "logits/rejected": 0.3071494400501251,
328
- "logps/chosen": -429.50146484375,
329
- "logps/rejected": -501.4217834472656,
330
- "loss": 0.5217,
331
- "rewards/accuracies": 0.7124999761581421,
332
- "rewards/chosen": -1.4701905250549316,
333
- "rewards/margins": 0.7591425776481628,
334
- "rewards/rejected": -2.2293331623077393,
335
  "step": 210
336
  },
337
  {
338
- "epoch": 0.46,
339
- "learning_rate": 1.9593447226892386e-06,
340
- "logits/chosen": 0.1677493005990982,
341
- "logits/rejected": 0.25236162543296814,
342
- "logps/chosen": -453.639892578125,
343
- "logps/rejected": -529.4569091796875,
344
- "loss": 0.5335,
345
- "rewards/accuracies": 0.7437499761581421,
346
- "rewards/chosen": -1.4858437776565552,
347
- "rewards/margins": 0.8870512247085571,
348
- "rewards/rejected": -2.372894763946533,
349
  "step": 220
350
  },
351
  {
352
- "epoch": 0.48,
353
- "learning_rate": 1.853638403264141e-06,
354
- "logits/chosen": 0.18635836243629456,
355
- "logits/rejected": 0.2622009217739105,
356
- "logps/chosen": -424.3346252441406,
357
- "logps/rejected": -484.9342346191406,
358
- "loss": 0.5094,
359
- "rewards/accuracies": 0.737500011920929,
360
- "rewards/chosen": -1.2930189371109009,
361
- "rewards/margins": 0.7800495028495789,
362
- "rewards/rejected": -2.073068380355835,
363
  "step": 230
364
  },
365
  {
366
- "epoch": 0.5,
367
- "learning_rate": 1.7460364672965328e-06,
368
- "logits/chosen": 0.06422718614339828,
369
- "logits/rejected": 0.0699523538351059,
370
- "logps/chosen": -472.2220153808594,
371
- "logps/rejected": -553.8200073242188,
372
- "loss": 0.5098,
373
- "rewards/accuracies": 0.6937500238418579,
374
- "rewards/chosen": -1.5787314176559448,
375
- "rewards/margins": 0.7840873003005981,
376
- "rewards/rejected": -2.362818717956543,
377
  "step": 240
378
  },
379
  {
380
- "epoch": 0.52,
381
- "learning_rate": 1.637115696063402e-06,
382
- "logits/chosen": 0.09498562663793564,
383
- "logits/rejected": 0.23446598649024963,
384
- "logps/chosen": -436.4678649902344,
385
- "logps/rejected": -487.33929443359375,
386
- "loss": 0.5396,
387
- "rewards/accuracies": 0.7875000238418579,
388
- "rewards/chosen": -1.1849793195724487,
389
- "rewards/margins": 0.8154090642929077,
390
- "rewards/rejected": -2.0003883838653564,
391
  "step": 250
392
  },
393
  {
394
- "epoch": 0.54,
395
- "learning_rate": 1.5274599402265162e-06,
396
- "logits/chosen": 0.08814150094985962,
397
- "logits/rejected": 0.16783395409584045,
398
- "logps/chosen": -431.7601623535156,
399
- "logps/rejected": -454.37799072265625,
400
- "loss": 0.5208,
401
- "rewards/accuracies": 0.6499999761581421,
402
- "rewards/chosen": -1.176255464553833,
403
- "rewards/margins": 0.7015119791030884,
404
- "rewards/rejected": -1.877767562866211,
405
  "step": 260
406
  },
407
  {
408
- "epoch": 0.57,
409
- "learning_rate": 1.4176569902035088e-06,
410
- "logits/chosen": 0.013920878991484642,
411
- "logits/rejected": -0.04539678990840912,
412
- "logps/chosen": -459.3199157714844,
413
- "logps/rejected": -532.9890747070312,
414
- "loss": 0.5266,
415
- "rewards/accuracies": 0.731249988079071,
416
- "rewards/chosen": -1.3778207302093506,
417
- "rewards/margins": 0.8610237240791321,
418
- "rewards/rejected": -2.238844633102417,
419
  "step": 270
420
  },
421
  {
422
- "epoch": 0.59,
423
- "learning_rate": 1.308295425420593e-06,
424
- "logits/chosen": 0.09688195586204529,
425
- "logits/rejected": 0.10716281831264496,
426
- "logps/chosen": -416.7201232910156,
427
- "logps/rejected": -503.9598083496094,
428
- "loss": 0.5308,
429
- "rewards/accuracies": 0.706250011920929,
430
- "rewards/chosen": -1.1698511838912964,
431
- "rewards/margins": 0.8221219778060913,
432
- "rewards/rejected": -1.9919731616973877,
433
  "step": 280
434
  },
435
  {
436
- "epoch": 0.61,
437
- "learning_rate": 1.1999614593359337e-06,
438
- "logits/chosen": 0.04211825877428055,
439
- "logits/rejected": 0.12247484922409058,
440
- "logps/chosen": -450.50439453125,
441
- "logps/rejected": -509.80059814453125,
442
- "loss": 0.4921,
443
- "rewards/accuracies": 0.737500011920929,
444
- "rewards/chosen": -0.9760421514511108,
445
- "rewards/margins": 0.8807096481323242,
446
- "rewards/rejected": -1.856751799583435,
447
  "step": 290
448
  },
449
  {
450
- "epoch": 0.63,
451
- "learning_rate": 1.0932357971453745e-06,
452
- "logits/chosen": 0.1820950210094452,
453
- "logits/rejected": 0.16093513369560242,
454
- "logps/chosen": -430.36566162109375,
455
- "logps/rejected": -520.7562866210938,
456
- "loss": 0.528,
457
- "rewards/accuracies": 0.706250011920929,
458
- "rewards/chosen": -1.4484156370162964,
459
- "rewards/margins": 0.7810145020484924,
460
- "rewards/rejected": -2.2294304370880127,
461
- "step": 300
462
- },
463
- {
464
- "epoch": 0.63,
465
- "eval_logits/chosen": 0.12960398197174072,
466
- "eval_logits/rejected": 0.10445590317249298,
467
- "eval_logps/chosen": -451.38214111328125,
468
- "eval_logps/rejected": -573.0014038085938,
469
- "eval_loss": 0.5161482691764832,
470
- "eval_rewards/accuracies": 0.7578125,
471
- "eval_rewards/chosen": -1.4779750108718872,
472
- "eval_rewards/margins": 1.135788083076477,
473
- "eval_rewards/rejected": -2.6137630939483643,
474
- "eval_runtime": 65.1979,
475
- "eval_samples_per_second": 30.676,
476
- "eval_steps_per_second": 0.491,
477
  "step": 300
478
  },
479
  {
480
- "epoch": 0.65,
481
- "learning_rate": 9.886905230142433e-07,
482
- "logits/chosen": -0.021408915519714355,
483
- "logits/rejected": -0.032668907195329666,
484
- "logps/chosen": -442.490478515625,
485
- "logps/rejected": -543.8944091796875,
486
- "loss": 0.4964,
487
- "rewards/accuracies": 0.8062499761581421,
488
- "rewards/chosen": -1.465606927871704,
489
- "rewards/margins": 1.0980336666107178,
490
- "rewards/rejected": -2.563640594482422,
491
  "step": 310
492
  },
493
  {
494
- "epoch": 0.67,
495
- "learning_rate": 8.868860335206678e-07,
496
- "logits/chosen": -0.20789632201194763,
497
- "logits/rejected": -0.17585983872413635,
498
- "logps/chosen": -477.2587890625,
499
- "logps/rejected": -549.9071044921875,
500
- "loss": 0.5025,
501
- "rewards/accuracies": 0.7124999761581421,
502
- "rewards/chosen": -1.500857949256897,
503
- "rewards/margins": 0.9439705014228821,
504
- "rewards/rejected": -2.4448282718658447,
505
  "step": 320
506
  },
507
  {
508
- "epoch": 0.69,
509
- "learning_rate": 7.883680337481599e-07,
510
- "logits/chosen": -0.021555980667471886,
511
- "logits/rejected": -0.07717858999967575,
512
- "logps/chosen": -488.15985107421875,
513
- "logps/rejected": -586.7195434570312,
514
- "loss": 0.5155,
515
- "rewards/accuracies": 0.71875,
516
- "rewards/chosen": -1.5513451099395752,
517
- "rewards/margins": 0.9236253499984741,
518
- "rewards/rejected": -2.4749703407287598,
519
  "step": 330
520
  },
521
- {
522
- "epoch": 0.71,
523
- "learning_rate": 6.936646121293654e-07,
524
- "logits/chosen": -0.0836385041475296,
525
- "logits/rejected": -0.039361923933029175,
526
- "logps/chosen": -532.11181640625,
527
- "logps/rejected": -552.9940185546875,
528
- "loss": 0.5255,
529
- "rewards/accuracies": 0.6625000238418579,
530
- "rewards/chosen": -1.5922324657440186,
531
- "rewards/margins": 0.7127285003662109,
532
- "rewards/rejected": -2.3049609661102295,
533
- "step": 340
534
- },
535
- {
536
- "epoch": 0.73,
537
- "learning_rate": 6.032834097207889e-07,
538
- "logits/chosen": 0.012007070705294609,
539
- "logits/rejected": 0.023168018087744713,
540
- "logps/chosen": -392.9766845703125,
541
- "logps/rejected": -494.611328125,
542
- "loss": 0.5176,
543
- "rewards/accuracies": 0.793749988079071,
544
- "rewards/chosen": -1.1996792554855347,
545
- "rewards/margins": 0.8586348295211792,
546
- "rewards/rejected": -2.058314085006714,
547
- "step": 350
548
- },
549
- {
550
- "epoch": 0.75,
551
- "learning_rate": 5.177088990820725e-07,
552
- "logits/chosen": -0.050304025411605835,
553
- "logits/rejected": -0.019231608137488365,
554
- "logps/chosen": -416.4642639160156,
555
- "logps/rejected": -483.6036071777344,
556
- "loss": 0.5229,
557
- "rewards/accuracies": 0.737500011920929,
558
- "rewards/chosen": -1.2121403217315674,
559
- "rewards/margins": 0.8643258810043335,
560
- "rewards/rejected": -2.0764665603637695,
561
- "step": 360
562
- },
563
- {
564
- "epoch": 0.77,
565
- "learning_rate": 4.3739978734594494e-07,
566
- "logits/chosen": -0.08456006646156311,
567
- "logits/rejected": -0.12612244486808777,
568
- "logps/chosen": -400.55206298828125,
569
- "logps/rejected": -503.98419189453125,
570
- "loss": 0.491,
571
- "rewards/accuracies": 0.7562500238418579,
572
- "rewards/chosen": -1.2748231887817383,
573
- "rewards/margins": 0.9540689587593079,
574
- "rewards/rejected": -2.2288920879364014,
575
- "step": 370
576
- },
577
- {
578
- "epoch": 0.8,
579
- "learning_rate": 3.627865573992087e-07,
580
- "logits/chosen": -0.15401089191436768,
581
- "logits/rejected": -0.20363488793373108,
582
- "logps/chosen": -478.338623046875,
583
- "logps/rejected": -561.3379516601562,
584
- "loss": 0.5324,
585
- "rewards/accuracies": 0.7124999761581421,
586
- "rewards/chosen": -1.4220519065856934,
587
- "rewards/margins": 1.054446816444397,
588
- "rewards/rejected": -2.4764983654022217,
589
- "step": 380
590
- },
591
- {
592
- "epoch": 0.82,
593
- "learning_rate": 2.9426916035484166e-07,
594
- "logits/chosen": -0.21970228850841522,
595
- "logits/rejected": -0.12131069600582123,
596
- "logps/chosen": -452.88702392578125,
597
- "logps/rejected": -512.7882690429688,
598
- "loss": 0.4985,
599
- "rewards/accuracies": 0.737500011920929,
600
- "rewards/chosen": -1.302075743675232,
601
- "rewards/margins": 0.9814373254776001,
602
- "rewards/rejected": -2.2835135459899902,
603
- "step": 390
604
- },
605
- {
606
- "epoch": 0.84,
607
- "learning_rate": 2.322148716843081e-07,
608
- "logits/chosen": -0.10807213932275772,
609
- "logits/rejected": -0.1505809724330902,
610
- "logps/chosen": -412.93658447265625,
611
- "logps/rejected": -500.96142578125,
612
- "loss": 0.5049,
613
- "rewards/accuracies": 0.7437499761581421,
614
- "rewards/chosen": -1.183068037033081,
615
- "rewards/margins": 0.9983505010604858,
616
- "rewards/rejected": -2.1814186573028564,
617
- "step": 400
618
- },
619
- {
620
- "epoch": 0.84,
621
- "eval_logits/chosen": -0.08459039032459259,
622
- "eval_logits/rejected": -0.12737514078617096,
623
- "eval_logps/chosen": -435.32611083984375,
624
- "eval_logps/rejected": -556.4354858398438,
625
- "eval_loss": 0.5054404735565186,
626
- "eval_rewards/accuracies": 0.77734375,
627
- "eval_rewards/chosen": -1.3174140453338623,
628
- "eval_rewards/margins": 1.1306906938552856,
629
- "eval_rewards/rejected": -2.4481046199798584,
630
- "eval_runtime": 63.6745,
631
- "eval_samples_per_second": 31.41,
632
- "eval_steps_per_second": 0.503,
633
- "step": 400
634
- },
635
- {
636
- "epoch": 0.86,
637
- "learning_rate": 1.7695632250191002e-07,
638
- "logits/chosen": -0.10287277400493622,
639
- "logits/rejected": -0.09486471116542816,
640
- "logps/chosen": -460.506103515625,
641
- "logps/rejected": -528.74365234375,
642
- "loss": 0.5085,
643
- "rewards/accuracies": 0.731249988079071,
644
- "rewards/chosen": -1.3777108192443848,
645
- "rewards/margins": 0.9177919626235962,
646
- "rewards/rejected": -2.2955029010772705,
647
- "step": 410
648
- },
649
- {
650
- "epoch": 0.88,
651
- "learning_rate": 1.2878971655412515e-07,
652
- "logits/chosen": -0.18269723653793335,
653
- "logits/rejected": -0.24112820625305176,
654
- "logps/chosen": -422.0091857910156,
655
- "logps/rejected": -485.4613342285156,
656
- "loss": 0.5023,
657
- "rewards/accuracies": 0.706250011920929,
658
- "rewards/chosen": -1.350635290145874,
659
- "rewards/margins": 0.7604650259017944,
660
- "rewards/rejected": -2.1110999584198,
661
- "step": 420
662
- },
663
- {
664
- "epoch": 0.9,
665
- "learning_rate": 8.797324247145411e-08,
666
- "logits/chosen": -0.24929451942443848,
667
- "logits/rejected": -0.2101927250623703,
668
- "logps/chosen": -461.18609619140625,
669
- "logps/rejected": -513.12841796875,
670
- "loss": 0.498,
671
- "rewards/accuracies": 0.762499988079071,
672
- "rewards/chosen": -1.363139271736145,
673
- "rewards/margins": 0.9195320010185242,
674
- "rewards/rejected": -2.2826714515686035,
675
- "step": 430
676
- },
677
- {
678
- "epoch": 0.92,
679
- "learning_rate": 5.472568979361853e-08,
680
- "logits/chosen": -0.2508998513221741,
681
- "logits/rejected": -0.23314960300922394,
682
- "logps/chosen": -452.8556213378906,
683
- "logps/rejected": -510.5809020996094,
684
- "loss": 0.5278,
685
- "rewards/accuracies": 0.6875,
686
- "rewards/chosen": -1.4022619724273682,
687
- "rewards/margins": 0.9772977828979492,
688
- "rewards/rejected": -2.3795602321624756,
689
- "step": 440
690
- },
691
- {
692
- "epoch": 0.94,
693
- "learning_rate": 2.922527618666465e-08,
694
- "logits/chosen": -0.3062313199043274,
695
- "logits/rejected": -0.20936842262744904,
696
- "logps/chosen": -482.806884765625,
697
- "logps/rejected": -512.525390625,
698
- "loss": 0.5118,
699
- "rewards/accuracies": 0.6625000238418579,
700
- "rewards/chosen": -1.379194736480713,
701
- "rewards/margins": 0.6993511915206909,
702
- "rewards/rejected": -2.0785460472106934,
703
- "step": 450
704
- },
705
  {
706
  "epoch": 0.96,
707
- "learning_rate": 1.1608692138469379e-08,
708
- "logits/chosen": -0.1790022850036621,
709
- "logits/rejected": -0.2542126774787903,
710
- "logps/chosen": -446.405517578125,
711
- "logps/rejected": -562.390869140625,
712
- "loss": 0.5164,
713
- "rewards/accuracies": 0.737500011920929,
714
- "rewards/chosen": -1.2966983318328857,
715
- "rewards/margins": 1.0804041624069214,
716
- "rewards/rejected": -2.3771026134490967,
717
- "step": 460
718
  },
719
  {
720
  "epoch": 0.98,
721
- "learning_rate": 1.970368253390198e-09,
722
- "logits/chosen": -0.26300907135009766,
723
- "logits/rejected": -0.3493719696998596,
724
- "logps/chosen": -444.3451232910156,
725
- "logps/rejected": -539.7431030273438,
726
- "loss": 0.5011,
727
- "rewards/accuracies": 0.737500011920929,
728
- "rewards/chosen": -1.3199527263641357,
729
- "rewards/margins": 1.0216041803359985,
730
- "rewards/rejected": -2.341557025909424,
731
- "step": 470
732
  },
733
  {
734
  "epoch": 1.0,
735
- "step": 477,
736
  "total_flos": 0.0,
737
- "train_loss": 0.5426262839535247,
738
- "train_runtime": 4328.2164,
739
- "train_samples_per_second": 14.125,
740
- "train_steps_per_second": 0.11
741
  }
742
  ],
743
  "logging_steps": 10,
744
- "max_steps": 477,
745
  "num_train_epochs": 1,
746
- "save_steps": 1000,
747
  "total_flos": 0.0,
748
  "trial_name": null,
749
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9975412715138743,
5
+ "eval_steps": 10000,
6
+ "global_step": 355,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "learning_rate": 2.7777777777777776e-07,
14
+ "logits/chosen": -0.08267850428819656,
15
+ "logits/rejected": -0.0387466736137867,
16
+ "logps/chosen": -327.2626037597656,
17
+ "logps/rejected": -244.530517578125,
18
+ "loss": 0.5075,
19
+ "rewards/accuracies": 0.375,
20
+ "rewards/chosen": -0.0007698397384956479,
21
+ "rewards/margins": 0.0003367254394106567,
22
+ "rewards/rejected": -0.0011065651196986437,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.06,
27
+ "learning_rate": 5.555555555555555e-07,
28
+ "logits/chosen": -0.05577896907925606,
29
+ "logits/rejected": -0.02608281373977661,
30
+ "logps/chosen": -291.40679931640625,
31
+ "logps/rejected": -195.19332885742188,
32
+ "loss": 0.5124,
33
+ "rewards/accuracies": 0.574999988079071,
34
+ "rewards/chosen": -0.0003207808767911047,
35
+ "rewards/margins": 0.007293092552572489,
36
+ "rewards/rejected": -0.007613874040544033,
37
  "step": 20
38
  },
39
  {
40
+ "epoch": 0.08,
41
+ "learning_rate": 8.333333333333333e-07,
42
+ "logits/chosen": -0.019027356058359146,
43
+ "logits/rejected": 0.024959497153759003,
44
+ "logps/chosen": -348.4835205078125,
45
+ "logps/rejected": -209.26522827148438,
46
+ "loss": 0.5167,
47
+ "rewards/accuracies": 0.6312500238418579,
48
+ "rewards/chosen": 0.006022198125720024,
49
+ "rewards/margins": 0.05245554447174072,
50
+ "rewards/rejected": -0.046433351933956146,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 0.11,
55
+ "learning_rate": 9.99612097830993e-07,
56
+ "logits/chosen": -0.012003236450254917,
57
+ "logits/rejected": 0.02850813791155815,
58
+ "logps/chosen": -310.19439697265625,
59
+ "logps/rejected": -234.8997039794922,
60
+ "loss": 0.5424,
61
+ "rewards/accuracies": 0.5687500238418579,
62
+ "rewards/chosen": -0.06786860525608063,
63
+ "rewards/margins": 0.05475381761789322,
64
+ "rewards/rejected": -0.12262241542339325,
65
  "step": 40
66
  },
67
  {
68
+ "epoch": 0.14,
69
+ "learning_rate": 9.952551076085863e-07,
70
+ "logits/chosen": 0.0373210608959198,
71
+ "logits/rejected": 0.07148866355419159,
72
+ "logps/chosen": -325.3377990722656,
73
+ "logps/rejected": -265.7305908203125,
74
+ "loss": 0.5454,
75
+ "rewards/accuracies": 0.625,
76
+ "rewards/chosen": -0.09839601814746857,
77
+ "rewards/margins": 0.1340223103761673,
78
+ "rewards/rejected": -0.23241834342479706,
79
  "step": 50
80
  },
81
  {
82
+ "epoch": 0.17,
83
+ "learning_rate": 9.860986139994238e-07,
84
+ "logits/chosen": -0.04613853245973587,
85
+ "logits/rejected": 0.015464186668395996,
86
+ "logps/chosen": -391.8591003417969,
87
+ "logps/rejected": -237.02279663085938,
88
+ "loss": 0.5384,
89
+ "rewards/accuracies": 0.6625000238418579,
90
+ "rewards/chosen": -0.04945594444870949,
91
+ "rewards/margins": 0.2955462634563446,
92
+ "rewards/rejected": -0.3450022041797638,
93
  "step": 60
94
  },
95
  {
96
+ "epoch": 0.2,
97
+ "learning_rate": 9.722313523268027e-07,
98
+ "logits/chosen": -0.05867184326052666,
99
+ "logits/rejected": 0.02377297915518284,
100
+ "logps/chosen": -370.95343017578125,
101
+ "logps/rejected": -244.0323028564453,
102
+ "loss": 0.5278,
103
+ "rewards/accuracies": 0.59375,
104
+ "rewards/chosen": -0.024297554045915604,
105
+ "rewards/margins": 0.19173592329025269,
106
+ "rewards/rejected": -0.21603348851203918,
107
  "step": 70
108
  },
109
  {
110
+ "epoch": 0.22,
111
+ "learning_rate": 9.537877098354784e-07,
112
+ "logits/chosen": -0.010928474366664886,
113
+ "logits/rejected": -0.0014367073308676481,
114
+ "logps/chosen": -264.022216796875,
115
+ "logps/rejected": -202.35671997070312,
116
+ "loss": 0.5297,
117
+ "rewards/accuracies": 0.6499999761581421,
118
+ "rewards/chosen": -0.04622369259595871,
119
+ "rewards/margins": 0.17698831856250763,
120
+ "rewards/rejected": -0.22321197390556335,
121
  "step": 80
122
  },
123
  {
124
+ "epoch": 0.25,
125
+ "learning_rate": 9.309464233486386e-07,
126
+ "logits/chosen": -0.10472371429204941,
127
+ "logits/rejected": -0.04422920569777489,
128
+ "logps/chosen": -364.49853515625,
129
+ "logps/rejected": -217.35791015625,
130
+ "loss": 0.516,
131
+ "rewards/accuracies": 0.6937500238418579,
132
+ "rewards/chosen": 0.05392575263977051,
133
+ "rewards/margins": 0.32242026925086975,
134
+ "rewards/rejected": -0.26849451661109924,
135
  "step": 90
136
  },
137
  {
138
+ "epoch": 0.28,
139
+ "learning_rate": 9.039288471343504e-07,
140
+ "logits/chosen": -0.06652472913265228,
141
+ "logits/rejected": -0.046227507293224335,
142
+ "logps/chosen": -343.7643127441406,
143
+ "logps/rejected": -256.42938232421875,
144
+ "loss": 0.5032,
145
+ "rewards/accuracies": 0.59375,
146
+ "rewards/chosen": -0.0055793882347643375,
147
+ "rewards/margins": 0.1420070379972458,
148
+ "rewards/rejected": -0.1475864201784134,
149
  "step": 100
150
  },
151
  {
152
+ "epoch": 0.31,
153
+ "learning_rate": 8.729968077675454e-07,
154
+ "logits/chosen": -0.14867696166038513,
155
+ "logits/rejected": -0.09129262715578079,
156
+ "logps/chosen": -295.33929443359375,
157
+ "logps/rejected": -247.62142944335938,
158
+ "loss": 0.4879,
159
+ "rewards/accuracies": 0.5375000238418579,
160
+ "rewards/chosen": -0.0303972028195858,
161
+ "rewards/margins": 0.09183444827795029,
162
+ "rewards/rejected": -0.12223164737224579,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  "step": 110
164
  },
165
  {
166
+ "epoch": 0.34,
167
+ "learning_rate": 8.384500667760089e-07,
168
+ "logits/chosen": -0.13717997074127197,
169
+ "logits/rejected": -0.10880477726459503,
170
+ "logps/chosen": -314.3236999511719,
171
+ "logps/rejected": -198.9403533935547,
172
+ "loss": 0.4789,
173
+ "rewards/accuracies": 0.6499999761581421,
174
+ "rewards/chosen": 0.03523362800478935,
175
+ "rewards/margins": 0.17541493475437164,
176
+ "rewards/rejected": -0.140181303024292,
177
  "step": 120
178
  },
179
  {
180
+ "epoch": 0.37,
181
+ "learning_rate": 8.006234156598042e-07,
182
+ "logits/chosen": -0.14056822657585144,
183
+ "logits/rejected": -0.09251274913549423,
184
+ "logps/chosen": -349.345458984375,
185
+ "logps/rejected": -211.16281127929688,
186
+ "loss": 0.478,
187
+ "rewards/accuracies": 0.637499988079071,
188
+ "rewards/chosen": 0.007185462862253189,
189
+ "rewards/margins": 0.2594950795173645,
190
+ "rewards/rejected": -0.2523096203804016,
191
  "step": 130
192
  },
193
  {
194
+ "epoch": 0.39,
195
+ "learning_rate": 7.59883431436215e-07,
196
+ "logits/chosen": -0.11560620367527008,
197
+ "logits/rejected": -0.09353138506412506,
198
+ "logps/chosen": -307.52716064453125,
199
+ "logps/rejected": -232.7269287109375,
200
+ "loss": 0.4694,
201
+ "rewards/accuracies": 0.643750011920929,
202
+ "rewards/chosen": -0.086158387362957,
203
+ "rewards/margins": 0.15163089334964752,
204
+ "rewards/rejected": -0.23778927326202393,
205
  "step": 140
206
  },
207
  {
208
+ "epoch": 0.42,
209
+ "learning_rate": 7.166249241521318e-07,
210
+ "logits/chosen": -0.09010031074285507,
211
+ "logits/rejected": -0.050861239433288574,
212
+ "logps/chosen": -286.13616943359375,
213
+ "logps/rejected": -242.67953491210938,
214
+ "loss": 0.4676,
215
+ "rewards/accuracies": 0.606249988079071,
216
+ "rewards/chosen": -0.16256649792194366,
217
+ "rewards/margins": 0.15163187682628632,
218
+ "rewards/rejected": -0.3141983449459076,
219
  "step": 150
220
  },
221
  {
222
+ "epoch": 0.45,
223
+ "learning_rate": 6.712671107909358e-07,
224
+ "logits/chosen": -0.18100903928279877,
225
+ "logits/rejected": -0.10084307193756104,
226
+ "logps/chosen": -358.4205322265625,
227
+ "logps/rejected": -247.5520782470703,
228
+ "loss": 0.4636,
229
+ "rewards/accuracies": 0.637499988079071,
230
+ "rewards/chosen": -0.09556841105222702,
231
+ "rewards/margins": 0.2233666479587555,
232
+ "rewards/rejected": -0.31893500685691833,
233
  "step": 160
234
  },
235
  {
236
+ "epoch": 0.48,
237
+ "learning_rate": 6.24249552652447e-07,
238
+ "logits/chosen": -0.1316126585006714,
239
+ "logits/rejected": -0.10074617713689804,
240
+ "logps/chosen": -309.5055236816406,
241
+ "logps/rejected": -261.95977783203125,
242
+ "loss": 0.4396,
243
+ "rewards/accuracies": 0.59375,
244
+ "rewards/chosen": -0.14085456728935242,
245
+ "rewards/margins": 0.18683212995529175,
246
+ "rewards/rejected": -0.32768669724464417,
247
  "step": 170
248
  },
249
  {
250
+ "epoch": 0.51,
251
+ "learning_rate": 5.760278955766694e-07,
252
+ "logits/chosen": -0.23117272555828094,
253
+ "logits/rejected": -0.16750793159008026,
254
+ "logps/chosen": -324.4317932128906,
255
+ "logps/rejected": -253.4479217529297,
256
+ "loss": 0.4428,
257
+ "rewards/accuracies": 0.59375,
258
+ "rewards/chosen": -0.22457607090473175,
259
+ "rewards/margins": 0.1670912355184555,
260
+ "rewards/rejected": -0.39166730642318726,
261
  "step": 180
262
  },
263
  {
264
+ "epoch": 0.53,
265
+ "learning_rate": 5.270694542927088e-07,
266
+ "logits/chosen": -0.17674800753593445,
267
+ "logits/rejected": -0.13648080825805664,
268
+ "logps/chosen": -336.358642578125,
269
+ "logps/rejected": -226.1509246826172,
270
+ "loss": 0.4263,
271
+ "rewards/accuracies": 0.65625,
272
+ "rewards/chosen": -0.160172238945961,
273
+ "rewards/margins": 0.2827780246734619,
274
+ "rewards/rejected": -0.4429502487182617,
275
  "step": 190
276
  },
277
  {
278
+ "epoch": 0.56,
279
+ "learning_rate": 4.778486836848107e-07,
280
+ "logits/chosen": -0.21899878978729248,
281
+ "logits/rejected": -0.13763020932674408,
282
+ "logps/chosen": -316.164794921875,
283
+ "logps/rejected": -248.52218627929688,
284
+ "loss": 0.4349,
285
+ "rewards/accuracies": 0.581250011920929,
286
+ "rewards/chosen": -0.2667180895805359,
287
+ "rewards/margins": 0.17106209695339203,
288
+ "rewards/rejected": -0.43778014183044434,
289
  "step": 200
290
  },
291
  {
292
+ "epoch": 0.59,
293
+ "learning_rate": 4.2884258086335745e-07,
294
+ "logits/chosen": -0.10616914182901382,
295
+ "logits/rejected": -0.06204689294099808,
296
+ "logps/chosen": -380.58349609375,
297
+ "logps/rejected": -267.28692626953125,
298
+ "loss": 0.427,
299
+ "rewards/accuracies": 0.5874999761581421,
300
+ "rewards/chosen": -0.19200220704078674,
301
+ "rewards/margins": 0.2422538697719574,
302
+ "rewards/rejected": -0.43425607681274414,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  "step": 210
304
  },
305
  {
306
+ "epoch": 0.62,
307
+ "learning_rate": 3.8052606259922095e-07,
308
+ "logits/chosen": -0.2504015564918518,
309
+ "logits/rejected": -0.1911773979663849,
310
+ "logps/chosen": -355.21234130859375,
311
+ "logps/rejected": -239.8585968017578,
312
+ "loss": 0.432,
313
+ "rewards/accuracies": 0.6187499761581421,
314
+ "rewards/chosen": -0.2169329822063446,
315
+ "rewards/margins": 0.1906813234090805,
316
+ "rewards/rejected": -0.4076143205165863,
317
  "step": 220
318
  },
319
  {
320
+ "epoch": 0.65,
321
+ "learning_rate": 3.333673629186279e-07,
322
+ "logits/chosen": -0.1560947299003601,
323
+ "logits/rejected": -0.10800876468420029,
324
+ "logps/chosen": -324.886962890625,
325
+ "logps/rejected": -239.11428833007812,
326
+ "loss": 0.4215,
327
+ "rewards/accuracies": 0.6187499761581421,
328
+ "rewards/chosen": -0.20993120968341827,
329
+ "rewards/margins": 0.2062130719423294,
330
+ "rewards/rejected": -0.4161442816257477,
331
  "step": 230
332
  },
333
  {
334
+ "epoch": 0.67,
335
+ "learning_rate": 2.878234954603167e-07,
336
+ "logits/chosen": -0.18873067200183868,
337
+ "logits/rejected": -0.10890357196331024,
338
+ "logps/chosen": -367.36383056640625,
339
+ "logps/rejected": -254.75003051757812,
340
+ "loss": 0.4096,
341
+ "rewards/accuracies": 0.6312500238418579,
342
+ "rewards/chosen": -0.1997281014919281,
343
+ "rewards/margins": 0.2125912606716156,
344
+ "rewards/rejected": -0.4123193621635437,
345
  "step": 240
346
  },
347
  {
348
+ "epoch": 0.7,
349
+ "learning_rate": 2.443358245691555e-07,
350
+ "logits/chosen": -0.20429889857769012,
351
+ "logits/rejected": -0.15454210340976715,
352
+ "logps/chosen": -369.3761291503906,
353
+ "logps/rejected": -248.98831176757812,
354
+ "loss": 0.4184,
355
+ "rewards/accuracies": 0.706250011920929,
356
+ "rewards/chosen": -0.16036532819271088,
357
+ "rewards/margins": 0.31407758593559265,
358
+ "rewards/rejected": -0.4744429588317871,
359
  "step": 250
360
  },
361
  {
362
+ "epoch": 0.73,
363
+ "learning_rate": 2.0332578804662782e-07,
364
+ "logits/chosen": -0.21500757336616516,
365
+ "logits/rejected": -0.16170722246170044,
366
+ "logps/chosen": -355.4232482910156,
367
+ "logps/rejected": -257.4869689941406,
368
+ "loss": 0.4354,
369
+ "rewards/accuracies": 0.65625,
370
+ "rewards/chosen": -0.22508692741394043,
371
+ "rewards/margins": 0.26084834337234497,
372
+ "rewards/rejected": -0.4859352707862854,
373
  "step": 260
374
  },
375
  {
376
+ "epoch": 0.76,
377
+ "learning_rate": 1.651908130088947e-07,
378
+ "logits/chosen": -0.1587514579296112,
379
+ "logits/rejected": -0.14712968468666077,
380
+ "logps/chosen": -329.80462646484375,
381
+ "logps/rejected": -258.6466369628906,
382
+ "loss": 0.4171,
383
+ "rewards/accuracies": 0.6312500238418579,
384
+ "rewards/chosen": -0.31652820110321045,
385
+ "rewards/margins": 0.23292319476604462,
386
+ "rewards/rejected": -0.5494514107704163,
387
  "step": 270
388
  },
389
  {
390
+ "epoch": 0.79,
391
+ "learning_rate": 1.3030046443173442e-07,
392
+ "logits/chosen": -0.14641807973384857,
393
+ "logits/rejected": -0.10507597029209137,
394
+ "logps/chosen": -379.27020263671875,
395
+ "logps/rejected": -258.7084655761719,
396
+ "loss": 0.4366,
397
+ "rewards/accuracies": 0.65625,
398
+ "rewards/chosen": -0.2606154978275299,
399
+ "rewards/margins": 0.2980353832244873,
400
+ "rewards/rejected": -0.5586508512496948,
401
  "step": 280
402
  },
403
  {
404
+ "epoch": 0.81,
405
+ "learning_rate": 9.899286370670574e-08,
406
+ "logits/chosen": -0.1658031940460205,
407
+ "logits/rejected": -0.0985722690820694,
408
+ "logps/chosen": -343.7666320800781,
409
+ "logps/rejected": -271.5211181640625,
410
+ "loss": 0.4418,
411
+ "rewards/accuracies": 0.550000011920929,
412
+ "rewards/chosen": -0.3174339830875397,
413
+ "rewards/margins": 0.17954358458518982,
414
+ "rewards/rejected": -0.49697762727737427,
415
  "step": 290
416
  },
417
  {
418
+ "epoch": 0.84,
419
+ "learning_rate": 7.157141191620548e-08,
420
+ "logits/chosen": -0.2034151554107666,
421
+ "logits/rejected": -0.10025110095739365,
422
+ "logps/chosen": -365.5801086425781,
423
+ "logps/rejected": -251.5866241455078,
424
+ "loss": 0.4302,
425
+ "rewards/accuracies": 0.606249988079071,
426
+ "rewards/chosen": -0.19396531581878662,
427
+ "rewards/margins": 0.27736470103263855,
428
+ "rewards/rejected": -0.47133007645606995,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  "step": 300
430
  },
431
  {
432
+ "epoch": 0.87,
433
+ "learning_rate": 4.830184958207006e-08,
434
+ "logits/chosen": -0.20084922015666962,
435
+ "logits/rejected": -0.11563346534967422,
436
+ "logps/chosen": -336.1097106933594,
437
+ "logps/rejected": -272.353515625,
438
+ "loss": 0.4291,
439
+ "rewards/accuracies": 0.6187499761581421,
440
+ "rewards/chosen": -0.24448652565479279,
441
+ "rewards/margins": 0.242269366979599,
442
+ "rewards/rejected": -0.48675593733787537,
443
  "step": 310
444
  },
445
  {
446
+ "epoch": 0.9,
447
+ "learning_rate": 2.940968138161731e-08,
448
+ "logits/chosen": -0.13686858117580414,
449
+ "logits/rejected": -0.11668189615011215,
450
+ "logps/chosen": -317.7008361816406,
451
+ "logps/rejected": -261.1730041503906,
452
+ "loss": 0.4301,
453
+ "rewards/accuracies": 0.5687500238418579,
454
+ "rewards/chosen": -0.2534436285495758,
455
+ "rewards/margins": 0.16237813234329224,
456
+ "rewards/rejected": -0.41582173109054565,
457
  "step": 320
458
  },
459
  {
460
+ "epoch": 0.93,
461
+ "learning_rate": 1.507799078812799e-08,
462
+ "logits/chosen": -0.23092389106750488,
463
+ "logits/rejected": -0.16825535893440247,
464
+ "logps/chosen": -412.626708984375,
465
+ "logps/rejected": -301.6609802246094,
466
+ "loss": 0.4204,
467
+ "rewards/accuracies": 0.625,
468
+ "rewards/chosen": -0.27390944957733154,
469
+ "rewards/margins": 0.2617945373058319,
470
+ "rewards/rejected": -0.5357040166854858,
471
  "step": 330
472
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  {
474
  "epoch": 0.96,
475
+ "learning_rate": 5.445665814031941e-09,
476
+ "logits/chosen": -0.1695241779088974,
477
+ "logits/rejected": -0.12149496376514435,
478
+ "logps/chosen": -358.09063720703125,
479
+ "logps/rejected": -265.50189208984375,
480
+ "loss": 0.4321,
481
+ "rewards/accuracies": 0.637499988079071,
482
+ "rewards/chosen": -0.2663304805755615,
483
+ "rewards/margins": 0.24073641002178192,
484
+ "rewards/rejected": -0.5070669054985046,
485
+ "step": 340
486
  },
487
  {
488
  "epoch": 0.98,
489
+ "learning_rate": 6.060530510659245e-10,
490
+ "logits/chosen": -0.2175191193819046,
491
+ "logits/rejected": -0.21960613131523132,
492
+ "logps/chosen": -345.24298095703125,
493
+ "logps/rejected": -263.9671630859375,
494
+ "loss": 0.4306,
495
+ "rewards/accuracies": 0.6499999761581421,
496
+ "rewards/chosen": -0.2784077525138855,
497
+ "rewards/margins": 0.24674105644226074,
498
+ "rewards/rejected": -0.5251488089561462,
499
+ "step": 350
500
  },
501
  {
502
  "epoch": 1.0,
503
+ "step": 355,
504
  "total_flos": 0.0,
505
+ "train_loss": 0.464236611379704,
506
+ "train_runtime": 5271.2295,
507
+ "train_samples_per_second": 8.641,
508
+ "train_steps_per_second": 0.067
509
  }
510
  ],
511
  "logging_steps": 10,
512
+ "max_steps": 355,
513
  "num_train_epochs": 1,
514
+ "save_steps": 10000,
515
  "total_flos": 0.0,
516
  "trial_name": null,
517
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a91613e24ef836988f352fb159b97a4baf24844c507e3a389c701dcc985b914
3
  size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:132930e5c6429d808850916e7587770d7157ebba376ed0e45170c2bd96c2061b
3
  size 6648