RikkiXu commited on
Commit
815db06
1 Parent(s): 5498bfa

Model save

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-dpo-full
15
 
16
- This model was trained from scratch on the None dataset.
17
 
18
  ## Model description
19
 
@@ -54,5 +54,5 @@ The following hyperparameters were used during training:
54
 
55
  - Transformers 4.39.3
56
  - Pytorch 2.1.2+cu118
57
- - Datasets 2.16.1
58
  - Tokenizers 0.15.2
 
13
 
14
  # zephyr-7b-dpo-full
15
 
16
+ This model was trained from scratch on an unknown dataset.
17
 
18
  ## Model description
19
 
 
54
 
55
  - Transformers 4.39.3
56
  - Pytorch 2.1.2+cu118
57
+ - Datasets 2.19.1
58
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6571147800988233,
4
- "train_runtime": 7042.8127,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 8.68,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.2810401298381664,
4
+ "train_runtime": 5417.1403,
5
+ "train_samples": 48530,
6
+ "train_samples_per_second": 8.959,
7
+ "train_steps_per_second": 0.035
8
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/mnt/bn/xuruijie-llm/checkpoints/hh-rlhf/sft_0521/checkpoint-5500/",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "/mnt/bn/xuruijie-llm/checkpoints/new_world/v1-ultral",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dddc5c965ccc5628ea39a77853c3fb62741e797052df590decda94e2abd07848
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b008443197fefaac049fe7a0eabc01b0151b73564d92afc7767c1fa8c71e1ca8
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a54933b23f81e03fa674e855dbe4cff4d2b2db853148b612b2e9bda79732eef1
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3ff196fe6ec2e2cfd1db6b1aeb3c9df6fa0eb8b2d7025d4ac80bcd6c50ae14
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eec202fc7af03b241019dcbe2b7d9864139f64498f3bed67407f5bca3a50ac2
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026ec669932b9619a7d776b17104d80d5e127fd258a8759a65b68cadf6158a75
3
  size 4540532728
runs/Jun14_02-10-31_n136-112-146/events.out.tfevents.1718302665.n136-112-146.3899146.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98047bd374c280721085f729df31ccd128a61a18ee74e0c328ba033427961d5c
3
- size 12310
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a5e7a536edc8e9aacc4b85472396013cb5053fa96f34d95849b2c2113aa398b
3
+ size 18144
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6571147800988233,
4
- "train_runtime": 7042.8127,
5
- "train_samples": 61134,
6
- "train_samples_per_second": 8.68,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.2810401298381664,
4
+ "train_runtime": 5417.1403,
5
+ "train_samples": 48530,
6
+ "train_samples_per_second": 8.959,
7
+ "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 239,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "grad_norm": 571.8916022631114,
14
- "learning_rate": 2.083333333333333e-08,
15
- "logits/chosen": -2.048940896987915,
16
- "logits/rejected": -2.0532867908477783,
17
- "logps/chosen": -384.4364318847656,
18
- "logps/rejected": -422.8185729980469,
19
- "loss": 1.3635,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,362 +24,287 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.04,
28
- "grad_norm": 402.57642784617445,
29
- "learning_rate": 2.0833333333333333e-07,
30
- "logits/chosen": -2.09702730178833,
31
- "logits/rejected": -2.076167345046997,
32
- "logps/chosen": -348.87298583984375,
33
- "logps/rejected": -353.1369934082031,
34
- "loss": 1.2239,
35
- "rewards/accuracies": 0.4409722089767456,
36
- "rewards/chosen": 0.08129607886075974,
37
- "rewards/margins": 0.002124728402122855,
38
- "rewards/rejected": 0.07917135208845139,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.08,
43
- "grad_norm": 222.2200617264089,
44
- "learning_rate": 4.1666666666666667e-07,
45
- "logits/chosen": -2.1397833824157715,
46
- "logits/rejected": -2.131530523300171,
47
- "logps/chosen": -350.229736328125,
48
- "logps/rejected": -320.4481506347656,
49
- "loss": 0.941,
50
- "rewards/accuracies": 0.6000000238418579,
51
- "rewards/chosen": 1.970922827720642,
52
- "rewards/margins": 0.47100192308425903,
53
- "rewards/rejected": 1.4999210834503174,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.13,
58
- "grad_norm": 214.0593003301825,
59
- "learning_rate": 4.990398100856366e-07,
60
- "logits/chosen": -2.2208588123321533,
61
- "logits/rejected": -2.1912357807159424,
62
- "logps/chosen": -312.9959411621094,
63
- "logps/rejected": -347.01055908203125,
64
- "loss": 0.8736,
65
- "rewards/accuracies": 0.5375000238418579,
66
- "rewards/chosen": 4.563899040222168,
67
- "rewards/margins": 0.5541414618492126,
68
- "rewards/rejected": 4.009757041931152,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.17,
73
- "grad_norm": 153.2749263573273,
74
- "learning_rate": 4.931986719649298e-07,
75
- "logits/chosen": -2.336960554122925,
76
- "logits/rejected": -2.318669080734253,
77
- "logps/chosen": -340.0647277832031,
78
- "logps/rejected": -310.8158874511719,
79
- "loss": 0.7797,
80
- "rewards/accuracies": 0.637499988079071,
81
- "rewards/chosen": 4.682702541351318,
82
- "rewards/margins": 1.4643404483795166,
83
- "rewards/rejected": 3.218362331390381,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.21,
88
- "grad_norm": 190.97179299432227,
89
- "learning_rate": 4.821741763807186e-07,
90
- "logits/chosen": -2.2911651134490967,
91
- "logits/rejected": -2.2763919830322266,
92
- "logps/chosen": -314.5186462402344,
93
- "logps/rejected": -321.0877380371094,
94
- "loss": 0.7046,
95
- "rewards/accuracies": 0.6625000238418579,
96
- "rewards/chosen": 2.596972942352295,
97
- "rewards/margins": 1.727838158607483,
98
- "rewards/rejected": 0.8691347241401672,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.25,
103
- "grad_norm": 164.07905629846462,
104
- "learning_rate": 4.662012913161997e-07,
105
- "logits/chosen": -2.275245428085327,
106
- "logits/rejected": -2.253256320953369,
107
- "logps/chosen": -356.04034423828125,
108
- "logps/rejected": -334.3038635253906,
109
- "loss": 0.6957,
110
- "rewards/accuracies": 0.7093750238418579,
111
- "rewards/chosen": 3.08062481880188,
112
- "rewards/margins": 1.801513671875,
113
- "rewards/rejected": 1.2791111469268799,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.29,
118
- "grad_norm": 182.02883713121474,
119
- "learning_rate": 4.456204510851956e-07,
120
- "logits/chosen": -2.232480525970459,
121
- "logits/rejected": -2.213390350341797,
122
- "logps/chosen": -354.5909118652344,
123
- "logps/rejected": -359.82049560546875,
124
- "loss": 0.6379,
125
- "rewards/accuracies": 0.6625000238418579,
126
- "rewards/chosen": 2.7843456268310547,
127
- "rewards/margins": 1.498595952987671,
128
- "rewards/rejected": 1.2857494354248047,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.33,
133
- "grad_norm": 185.93895599972302,
134
- "learning_rate": 4.2087030056579986e-07,
135
- "logits/chosen": -2.2146525382995605,
136
- "logits/rejected": -2.190126895904541,
137
- "logps/chosen": -339.37750244140625,
138
- "logps/rejected": -335.23565673828125,
139
- "loss": 0.6529,
140
- "rewards/accuracies": 0.675000011920929,
141
- "rewards/chosen": 2.3711397647857666,
142
- "rewards/margins": 1.5012987852096558,
143
- "rewards/rejected": 0.8698409199714661,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.38,
148
- "grad_norm": 190.95476688008705,
149
- "learning_rate": 3.9247834624635404e-07,
150
- "logits/chosen": -2.1572606563568115,
151
- "logits/rejected": -2.1407630443573,
152
- "logps/chosen": -313.8312072753906,
153
- "logps/rejected": -300.87176513671875,
154
- "loss": 0.6248,
155
- "rewards/accuracies": 0.653124988079071,
156
- "rewards/chosen": 1.4995744228363037,
157
- "rewards/margins": 1.1674902439117432,
158
- "rewards/rejected": 0.3320842981338501,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.42,
163
- "grad_norm": 195.00852100658702,
164
- "learning_rate": 3.610497133404795e-07,
165
- "logits/chosen": -2.1591010093688965,
166
- "logits/rejected": -2.1587300300598145,
167
- "logps/chosen": -335.4873962402344,
168
- "logps/rejected": -334.4920654296875,
169
- "loss": 0.6092,
170
- "rewards/accuracies": 0.671875,
171
- "rewards/chosen": 1.2783663272857666,
172
- "rewards/margins": 1.1912460327148438,
173
- "rewards/rejected": 0.08712034672498703,
174
  "step": 100
175
  },
176
  {
177
- "epoch": 0.46,
178
- "grad_norm": 192.15199269769496,
179
- "learning_rate": 3.272542485937368e-07,
180
- "logits/chosen": -2.1222145557403564,
181
- "logits/rejected": -2.108656644821167,
182
- "logps/chosen": -356.91278076171875,
183
- "logps/rejected": -335.1573791503906,
184
- "loss": 0.6316,
185
- "rewards/accuracies": 0.671875,
186
- "rewards/chosen": 0.5509830713272095,
187
- "rewards/margins": 1.5539244413375854,
188
- "rewards/rejected": -1.002941370010376,
189
  "step": 110
190
  },
191
  {
192
- "epoch": 0.5,
193
- "grad_norm": 199.26323132750235,
194
- "learning_rate": 2.9181224366319943e-07,
195
- "logits/chosen": -2.1514651775360107,
196
- "logits/rejected": -2.13875150680542,
197
- "logps/chosen": -335.4028015136719,
198
- "logps/rejected": -338.54803466796875,
199
- "loss": 0.5827,
200
- "rewards/accuracies": 0.6968749761581421,
201
- "rewards/chosen": 0.3591001331806183,
202
- "rewards/margins": 1.421007752418518,
203
- "rewards/rejected": -1.0619075298309326,
204
  "step": 120
205
  },
206
  {
207
- "epoch": 0.54,
208
- "grad_norm": 183.666249275795,
209
- "learning_rate": 2.55479083351317e-07,
210
- "logits/chosen": -2.1675009727478027,
211
- "logits/rejected": -2.150801420211792,
212
- "logps/chosen": -380.80010986328125,
213
- "logps/rejected": -352.38287353515625,
214
- "loss": 0.5791,
215
- "rewards/accuracies": 0.690625011920929,
216
- "rewards/chosen": 0.7985715866088867,
217
- "rewards/margins": 1.560523509979248,
218
- "rewards/rejected": -0.7619519829750061,
219
  "step": 130
220
  },
221
  {
222
- "epoch": 0.59,
223
- "grad_norm": 215.65073240301075,
224
- "learning_rate": 2.19029145890313e-07,
225
- "logits/chosen": -2.1623148918151855,
226
- "logits/rejected": -2.1485044956207275,
227
- "logps/chosen": -354.16107177734375,
228
- "logps/rejected": -346.3798828125,
229
- "loss": 0.5906,
230
- "rewards/accuracies": 0.6875,
231
- "rewards/chosen": 0.23650877177715302,
232
- "rewards/margins": 1.3487697839736938,
233
- "rewards/rejected": -1.1122608184814453,
234
  "step": 140
235
  },
236
  {
237
- "epoch": 0.63,
238
- "grad_norm": 187.86987212255403,
239
- "learning_rate": 1.8323929841460178e-07,
240
- "logits/chosen": -2.14518666267395,
241
- "logits/rejected": -2.1396467685699463,
242
- "logps/chosen": -357.54351806640625,
243
- "logps/rejected": -344.2846984863281,
244
- "loss": 0.5634,
245
- "rewards/accuracies": 0.690625011920929,
246
- "rewards/chosen": 0.020458679646253586,
247
- "rewards/margins": 1.4657080173492432,
248
- "rewards/rejected": -1.4452494382858276,
249
  "step": 150
250
  },
251
  {
252
- "epoch": 0.67,
253
- "grad_norm": 201.55973529208768,
254
- "learning_rate": 1.488723393865766e-07,
255
- "logits/chosen": -2.127716541290283,
256
- "logits/rejected": -2.107163667678833,
257
- "logps/chosen": -385.68658447265625,
258
- "logps/rejected": -347.5813903808594,
259
- "loss": 0.5458,
260
- "rewards/accuracies": 0.6625000238418579,
261
- "rewards/chosen": -0.02848060429096222,
262
- "rewards/margins": 1.421844244003296,
263
- "rewards/rejected": -1.4503247737884521,
264
  "step": 160
265
  },
266
  {
267
- "epoch": 0.71,
268
- "grad_norm": 220.5429072103582,
269
- "learning_rate": 1.1666074087171627e-07,
270
- "logits/chosen": -2.1673145294189453,
271
- "logits/rejected": -2.1441078186035156,
272
- "logps/chosen": -364.5622253417969,
273
- "logps/rejected": -367.15631103515625,
274
- "loss": 0.5613,
275
- "rewards/accuracies": 0.628125011920929,
276
- "rewards/chosen": -0.00023489892191719264,
277
- "rewards/margins": 1.2582697868347168,
278
- "rewards/rejected": -1.2585046291351318,
279
  "step": 170
280
  },
281
  {
282
- "epoch": 0.75,
283
- "grad_norm": 205.8312467056188,
284
- "learning_rate": 8.729103716819111e-08,
285
- "logits/chosen": -2.1819815635681152,
286
- "logits/rejected": -2.148003101348877,
287
- "logps/chosen": -365.36895751953125,
288
- "logps/rejected": -343.6730041503906,
289
- "loss": 0.5619,
290
- "rewards/accuracies": 0.7281249761581421,
291
- "rewards/chosen": 0.03583994507789612,
292
- "rewards/margins": 1.5244814157485962,
293
- "rewards/rejected": -1.4886412620544434,
294
  "step": 180
295
  },
296
- {
297
- "epoch": 0.79,
298
- "grad_norm": 248.5874406421173,
299
- "learning_rate": 6.138919252022435e-08,
300
- "logits/chosen": -2.1517252922058105,
301
- "logits/rejected": -2.140963315963745,
302
- "logps/chosen": -340.9563903808594,
303
- "logps/rejected": -343.2937927246094,
304
- "loss": 0.5592,
305
- "rewards/accuracies": 0.640625,
306
- "rewards/chosen": -0.48524799942970276,
307
- "rewards/margins": 1.1504557132720947,
308
- "rewards/rejected": -1.635703682899475,
309
- "step": 190
310
- },
311
- {
312
- "epoch": 0.84,
313
- "grad_norm": 195.75027491525935,
314
- "learning_rate": 3.9507259776993954e-08,
315
- "logits/chosen": -2.1510136127471924,
316
- "logits/rejected": -2.1407511234283447,
317
- "logps/chosen": -375.94866943359375,
318
- "logps/rejected": -375.7215576171875,
319
- "loss": 0.5553,
320
- "rewards/accuracies": 0.6812499761581421,
321
- "rewards/chosen": -0.11154387891292572,
322
- "rewards/margins": 1.316066861152649,
323
- "rewards/rejected": -1.4276106357574463,
324
- "step": 200
325
- },
326
- {
327
- "epoch": 0.88,
328
- "grad_norm": 193.59938752041137,
329
- "learning_rate": 2.2111614344599684e-08,
330
- "logits/chosen": -2.1561176776885986,
331
- "logits/rejected": -2.128119468688965,
332
- "logps/chosen": -384.52301025390625,
333
- "logps/rejected": -371.85626220703125,
334
- "loss": 0.5496,
335
- "rewards/accuracies": 0.7124999761581421,
336
- "rewards/chosen": 0.12478647381067276,
337
- "rewards/margins": 1.6165813207626343,
338
- "rewards/rejected": -1.4917947053909302,
339
- "step": 210
340
- },
341
- {
342
- "epoch": 0.92,
343
- "grad_norm": 184.30698727614964,
344
- "learning_rate": 9.57301420397924e-09,
345
- "logits/chosen": -2.14475154876709,
346
- "logits/rejected": -2.0955374240875244,
347
- "logps/chosen": -371.14569091796875,
348
- "logps/rejected": -362.03131103515625,
349
- "loss": 0.5406,
350
- "rewards/accuracies": 0.668749988079071,
351
- "rewards/chosen": -0.15589404106140137,
352
- "rewards/margins": 1.2348332405090332,
353
- "rewards/rejected": -1.3907272815704346,
354
- "step": 220
355
- },
356
- {
357
- "epoch": 0.96,
358
- "grad_norm": 201.71570488034948,
359
- "learning_rate": 2.158697848236607e-09,
360
- "logits/chosen": -2.133373737335205,
361
- "logits/rejected": -2.1216492652893066,
362
- "logps/chosen": -373.65228271484375,
363
- "logps/rejected": -375.40582275390625,
364
- "loss": 0.5667,
365
- "rewards/accuracies": 0.6781250238418579,
366
- "rewards/chosen": -0.2453586757183075,
367
- "rewards/margins": 1.2606414556503296,
368
- "rewards/rejected": -1.5060001611709595,
369
- "step": 230
370
- },
371
  {
372
  "epoch": 1.0,
373
- "step": 239,
374
  "total_flos": 0.0,
375
- "train_loss": 0.6571147800988233,
376
- "train_runtime": 7042.8127,
377
- "train_samples_per_second": 8.68,
378
- "train_steps_per_second": 0.034
379
  }
380
  ],
381
  "logging_steps": 10,
382
- "max_steps": 239,
383
  "num_input_tokens_seen": 0,
384
  "num_train_epochs": 1,
385
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9960474308300395,
5
  "eval_steps": 500,
6
+ "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "grad_norm": 4737.779382861946,
14
+ "learning_rate": 2.6315789473684208e-08,
15
+ "logits/chosen": -4.638427734375,
16
+ "logits/rejected": -4.891327857971191,
17
+ "logps/chosen": -198.52749633789062,
18
+ "logps/rejected": -147.3392791748047,
19
+ "loss": 2.1269,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.05,
28
+ "grad_norm": 3019.619079402316,
29
+ "learning_rate": 2.631578947368421e-07,
30
+ "logits/chosen": -4.496801376342773,
31
+ "logits/rejected": -4.816222190856934,
32
+ "logps/chosen": -224.27357482910156,
33
+ "logps/rejected": -168.04739379882812,
34
+ "loss": 1.9212,
35
+ "rewards/accuracies": 0.5381944179534912,
36
+ "rewards/chosen": 0.3541475236415863,
37
+ "rewards/margins": 0.37169286608695984,
38
+ "rewards/rejected": -0.017545383423566818,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.11,
43
+ "grad_norm": 1863.4195630562826,
44
+ "learning_rate": 4.999573126145131e-07,
45
+ "logits/chosen": -4.533459663391113,
46
+ "logits/rejected": -4.848563194274902,
47
+ "logps/chosen": -220.4309539794922,
48
+ "logps/rejected": -180.72413635253906,
49
+ "loss": 1.1783,
50
+ "rewards/accuracies": 0.8343750238418579,
51
+ "rewards/chosen": 5.777209281921387,
52
+ "rewards/margins": 6.331713676452637,
53
+ "rewards/rejected": -0.5545047521591187,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.16,
58
+ "grad_norm": 1882.3555396757283,
59
+ "learning_rate": 4.948524419003415e-07,
60
+ "logits/chosen": -4.54370641708374,
61
+ "logits/rejected": -4.812285423278809,
62
+ "logps/chosen": -213.49411010742188,
63
+ "logps/rejected": -177.16848754882812,
64
+ "loss": 1.3518,
65
+ "rewards/accuracies": 0.8531249761581421,
66
+ "rewards/chosen": 8.2464017868042,
67
+ "rewards/margins": 12.621076583862305,
68
+ "rewards/rejected": -4.3746747970581055,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.21,
73
+ "grad_norm": 1979.651061288252,
74
+ "learning_rate": 4.81409414945389e-07,
75
+ "logits/chosen": -4.619187831878662,
76
+ "logits/rejected": -4.8958845138549805,
77
+ "logps/chosen": -221.00082397460938,
78
+ "logps/rejected": -184.62203979492188,
79
+ "loss": 1.4689,
80
+ "rewards/accuracies": 0.8343750238418579,
81
+ "rewards/chosen": 7.427975654602051,
82
+ "rewards/margins": 15.529205322265625,
83
+ "rewards/rejected": -8.101228713989258,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.26,
88
+ "grad_norm": 2122.235483955093,
89
+ "learning_rate": 4.6008601790947314e-07,
90
+ "logits/chosen": -4.608691215515137,
91
+ "logits/rejected": -4.925226211547852,
92
+ "logps/chosen": -210.32058715820312,
93
+ "logps/rejected": -179.0367431640625,
94
+ "loss": 1.3821,
95
+ "rewards/accuracies": 0.859375,
96
+ "rewards/chosen": 6.428221225738525,
97
+ "rewards/margins": 16.42898941040039,
98
+ "rewards/rejected": -10.00076961517334,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.32,
103
+ "grad_norm": 1692.2277360562514,
104
+ "learning_rate": 4.3160839350405605e-07,
105
+ "logits/chosen": -4.665585994720459,
106
+ "logits/rejected": -4.9272074699401855,
107
+ "logps/chosen": -205.7926788330078,
108
+ "logps/rejected": -178.56011962890625,
109
+ "loss": 1.3465,
110
+ "rewards/accuracies": 0.859375,
111
+ "rewards/chosen": 7.834652900695801,
112
+ "rewards/margins": 16.5399169921875,
113
+ "rewards/rejected": -8.705263137817383,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.37,
118
+ "grad_norm": 1810.8868167381333,
119
+ "learning_rate": 3.9694631307311825e-07,
120
+ "logits/chosen": -4.6464009284973145,
121
+ "logits/rejected": -4.913968086242676,
122
+ "logps/chosen": -207.1618194580078,
123
+ "logps/rejected": -182.61012268066406,
124
+ "loss": 1.3564,
125
+ "rewards/accuracies": 0.8656250238418579,
126
+ "rewards/chosen": 8.192334175109863,
127
+ "rewards/margins": 16.401655197143555,
128
+ "rewards/rejected": -8.209321975708008,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.42,
133
+ "grad_norm": 1395.6821844604426,
134
+ "learning_rate": 3.572801521931522e-07,
135
+ "logits/chosen": -4.674800395965576,
136
+ "logits/rejected": -4.932587623596191,
137
+ "logps/chosen": -202.7789764404297,
138
+ "logps/rejected": -184.74395751953125,
139
+ "loss": 1.312,
140
+ "rewards/accuracies": 0.846875011920929,
141
+ "rewards/chosen": 5.3585076332092285,
142
+ "rewards/margins": 16.119762420654297,
143
+ "rewards/rejected": -10.761255264282227,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.47,
148
+ "grad_norm": 1761.719146022038,
149
+ "learning_rate": 3.139606943986089e-07,
150
+ "logits/chosen": -4.721759796142578,
151
+ "logits/rejected": -4.953747272491455,
152
+ "logps/chosen": -199.81448364257812,
153
+ "logps/rejected": -178.44004821777344,
154
+ "loss": 1.3425,
155
+ "rewards/accuracies": 0.815625011920929,
156
+ "rewards/chosen": 7.849789619445801,
157
+ "rewards/margins": 15.739909172058105,
158
+ "rewards/rejected": -7.890120029449463,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.53,
163
+ "grad_norm": 1641.4466240114464,
164
+ "learning_rate": 2.684631318687185e-07,
165
+ "logits/chosen": -4.7313385009765625,
166
+ "logits/rejected": -4.984685897827148,
167
+ "logps/chosen": -213.2564239501953,
168
+ "logps/rejected": -190.69088745117188,
169
+ "loss": 1.3623,
170
+ "rewards/accuracies": 0.862500011920929,
171
+ "rewards/chosen": 5.4120659828186035,
172
+ "rewards/margins": 16.620161056518555,
173
+ "rewards/rejected": -11.208093643188477,
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.58,
178
+ "grad_norm": 1346.9601711684072,
179
+ "learning_rate": 2.2233682952712483e-07,
180
+ "logits/chosen": -4.668034553527832,
181
+ "logits/rejected": -4.953825950622559,
182
+ "logps/chosen": -216.8499298095703,
183
+ "logps/rejected": -186.10470581054688,
184
+ "loss": 1.1234,
185
+ "rewards/accuracies": 0.856249988079071,
186
+ "rewards/chosen": 7.529428005218506,
187
+ "rewards/margins": 15.865753173828125,
188
+ "rewards/rejected": -8.336324691772461,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.63,
193
+ "grad_norm": 1999.3900490416042,
194
+ "learning_rate": 1.7715256327766884e-07,
195
+ "logits/chosen": -4.720789909362793,
196
+ "logits/rejected": -5.025943279266357,
197
+ "logps/chosen": -207.7978973388672,
198
+ "logps/rejected": -178.0445098876953,
199
+ "loss": 1.1185,
200
+ "rewards/accuracies": 0.859375,
201
+ "rewards/chosen": 8.364091873168945,
202
+ "rewards/margins": 16.011329650878906,
203
+ "rewards/rejected": -7.647237300872803,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.69,
208
+ "grad_norm": 1568.9083661238265,
209
+ "learning_rate": 1.3444902911492174e-07,
210
+ "logits/chosen": -4.702408790588379,
211
+ "logits/rejected": -4.98063325881958,
212
+ "logps/chosen": -215.88174438476562,
213
+ "logps/rejected": -188.39645385742188,
214
+ "loss": 1.2748,
215
+ "rewards/accuracies": 0.8531249761581421,
216
+ "rewards/chosen": 7.107934474945068,
217
+ "rewards/margins": 16.361108779907227,
218
+ "rewards/rejected": -9.253173828125,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.74,
223
+ "grad_norm": 1390.666498149427,
224
+ "learning_rate": 9.56804446775518e-08,
225
+ "logits/chosen": -4.6129560470581055,
226
+ "logits/rejected": -4.923257350921631,
227
+ "logps/chosen": -208.4700469970703,
228
+ "logps/rejected": -178.78623962402344,
229
+ "loss": 1.0987,
230
+ "rewards/accuracies": 0.859375,
231
+ "rewards/chosen": 6.485724449157715,
232
+ "rewards/margins": 17.193899154663086,
233
+ "rewards/rejected": -10.708174705505371,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 0.79,
238
+ "grad_norm": 1296.5821049110084,
239
+ "learning_rate": 6.216702761078166e-08,
240
+ "logits/chosen": -4.699868202209473,
241
+ "logits/rejected": -4.9864583015441895,
242
+ "logps/chosen": -196.650146484375,
243
+ "logps/rejected": -168.93551635742188,
244
+ "loss": 1.0856,
245
+ "rewards/accuracies": 0.8968750238418579,
246
+ "rewards/chosen": 7.758223056793213,
247
+ "rewards/margins": 17.15636444091797,
248
+ "rewards/rejected": -9.398139953613281,
249
  "step": 150
250
  },
251
  {
252
+ "epoch": 0.84,
253
+ "grad_norm": 1769.0071097352081,
254
+ "learning_rate": 3.5050037137906885e-08,
255
+ "logits/chosen": -4.634187698364258,
256
+ "logits/rejected": -4.958773612976074,
257
+ "logps/chosen": -211.03591918945312,
258
+ "logps/rejected": -176.72067260742188,
259
+ "loss": 1.1749,
260
+ "rewards/accuracies": 0.8531249761581421,
261
+ "rewards/chosen": 8.515033721923828,
262
+ "rewards/margins": 16.594724655151367,
263
+ "rewards/rejected": -8.079689979553223,
264
  "step": 160
265
  },
266
  {
267
+ "epoch": 0.9,
268
+ "grad_norm": 1606.699013433802,
269
+ "learning_rate": 1.5252909846235894e-08,
270
+ "logits/chosen": -4.62954044342041,
271
+ "logits/rejected": -4.913142204284668,
272
+ "logps/chosen": -209.8083953857422,
273
+ "logps/rejected": -184.52127075195312,
274
+ "loss": 1.2059,
275
+ "rewards/accuracies": 0.9125000238418579,
276
+ "rewards/chosen": 9.714839935302734,
277
+ "rewards/margins": 18.106443405151367,
278
+ "rewards/rejected": -8.391606330871582,
279
  "step": 170
280
  },
281
  {
282
+ "epoch": 0.95,
283
+ "grad_norm": 1242.5060745172418,
284
+ "learning_rate": 3.4498131616493565e-09,
285
+ "logits/chosen": -4.616083145141602,
286
+ "logits/rejected": -4.87780237197876,
287
+ "logps/chosen": -215.27685546875,
288
+ "logps/rejected": -193.55332946777344,
289
+ "loss": 1.1132,
290
+ "rewards/accuracies": 0.8125,
291
+ "rewards/chosen": 6.19677209854126,
292
+ "rewards/margins": 13.694157600402832,
293
+ "rewards/rejected": -7.497385501861572,
294
  "step": 180
295
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  {
297
  "epoch": 1.0,
298
+ "step": 189,
299
  "total_flos": 0.0,
300
+ "train_loss": 1.2810401298381664,
301
+ "train_runtime": 5417.1403,
302
+ "train_samples_per_second": 8.959,
303
+ "train_steps_per_second": 0.035
304
  }
305
  ],
306
  "logging_steps": 10,
307
+ "max_steps": 189,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 1,
310
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e370280be22422145d741eb20d38b32314505f586945952ee65047093ae07be1
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7513499dfa8cc403e8936c05aa22586d81859c8852cb1c5f413ff049d51a71d
3
  size 6264