RikkiXu commited on
Commit
796b25f
1 Parent(s): 6b56db6

Model save

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 5e-07
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 1e-08
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.9975380127246564,
4
- "train_runtime": 5482.1546,
5
- "train_samples": 49998,
6
- "train_samples_per_second": 9.12,
7
- "train_steps_per_second": 0.036
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6950656716028849,
4
+ "train_runtime": 4446.5407,
5
+ "train_samples": 38445,
6
+ "train_samples_per_second": 8.646,
7
+ "train_steps_per_second": 0.034
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47640a0478f2bcb7154d26393dc9ccd914f431fd8d23c33ec72d35f3d2adafe7
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:babd17a2e7814d83995456444b46a193dadee60ab7f7c1b37c860038f0952005
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52f69274e88b49d66a73ddb87507cd1bc38954b506db2290b371231064d4f84e
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a43edf0b9ec78db80fffc7ca6e3f6c0a1fe224c9898b090a4e4a3b7f5961b44
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd5168e074183b468a8f1504e1ee15cf17cb86551434001e60a29c90de0ac2d5
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f79f70bd9e954c826d33d1d7340ad20c07ac0dc88ee52022ef706382edcab32
3
  size 4540532728
runs/Jun11_20-45-25_n136-100-194/events.out.tfevents.1718110156.n136-100-194.3719830.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45ea4bec15ae790b1bcd703b701c8e94489e7b95a34a4b10b5b14cf736f95e55
3
- size 12315
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f85dfc6691a9f5ae892c7f4e4c4e0864a2e976e39837a0e7496c60ad0f111a48
3
+ size 16085
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.9975380127246564,
4
- "train_runtime": 5482.1546,
5
- "train_samples": 49998,
6
- "train_samples_per_second": 9.12,
7
- "train_steps_per_second": 0.036
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6950656716028849,
4
+ "train_runtime": 4446.5407,
5
+ "train_samples": 38445,
6
+ "train_samples_per_second": 8.646,
7
+ "train_steps_per_second": 0.034
8
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9974424552429667,
5
  "eval_steps": 500,
6
- "global_step": 195,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "grad_norm": 1310.024749740419,
14
- "learning_rate": 2.5e-08,
15
- "logits/chosen": -5.0504608154296875,
16
- "logits/rejected": -5.35328483581543,
17
- "logps/chosen": -242.7239990234375,
18
- "logps/rejected": -185.90835571289062,
19
- "loss": 0.6893,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,302 +24,242 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.05,
28
- "grad_norm": 1343.8700325036616,
29
- "learning_rate": 2.5e-07,
30
- "logits/chosen": -4.959235191345215,
31
- "logits/rejected": -5.051504135131836,
32
- "logps/chosen": -226.43630981445312,
33
- "logps/rejected": -216.47547912597656,
34
- "loss": 0.7205,
35
- "rewards/accuracies": 0.4479166567325592,
36
- "rewards/chosen": 0.07974544167518616,
37
- "rewards/margins": 0.013408761471509933,
38
- "rewards/rejected": 0.06633666902780533,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.1,
43
- "grad_norm": 1443.7667771719773,
44
- "learning_rate": 5e-07,
45
- "logits/chosen": -4.906929969787598,
46
- "logits/rejected": -5.0118937492370605,
47
- "logps/chosen": -240.65188598632812,
48
- "logps/rejected": -220.84378051757812,
49
- "loss": 0.6926,
50
- "rewards/accuracies": 0.612500011920929,
51
- "rewards/chosen": 0.7429171204566956,
52
- "rewards/margins": 1.1278517246246338,
53
- "rewards/rejected": -0.38493460416793823,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.15,
58
- "grad_norm": 1641.6770420153719,
59
- "learning_rate": 4.959823971496574e-07,
60
- "logits/chosen": -4.913812637329102,
61
- "logits/rejected": -5.012935638427734,
62
- "logps/chosen": -238.8269805908203,
63
- "logps/rejected": -228.05404663085938,
64
- "loss": 0.8116,
65
- "rewards/accuracies": 0.6343749761581421,
66
- "rewards/chosen": 1.8061437606811523,
67
- "rewards/margins": 4.523256301879883,
68
- "rewards/rejected": -2.7171127796173096,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.2,
73
- "grad_norm": 1382.4291689510926,
74
- "learning_rate": 4.840587176599343e-07,
75
- "logits/chosen": -4.964416980743408,
76
- "logits/rejected": -5.0027852058410645,
77
- "logps/chosen": -249.1742706298828,
78
- "logps/rejected": -235.87576293945312,
79
- "loss": 0.9983,
80
- "rewards/accuracies": 0.5531250238418579,
81
- "rewards/chosen": 1.3685696125030518,
82
- "rewards/margins": 4.053561210632324,
83
- "rewards/rejected": -2.6849913597106934,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.26,
88
- "grad_norm": 1428.1508779981239,
89
- "learning_rate": 4.646121984004665e-07,
90
- "logits/chosen": -4.990395545959473,
91
- "logits/rejected": -5.134562015533447,
92
- "logps/chosen": -251.7528076171875,
93
- "logps/rejected": -226.17306518554688,
94
- "loss": 0.9987,
95
- "rewards/accuracies": 0.6468750238418579,
96
- "rewards/chosen": 2.2698659896850586,
97
- "rewards/margins": 5.616934299468994,
98
- "rewards/rejected": -3.3470687866210938,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.31,
103
- "grad_norm": 1429.7364912941882,
104
- "learning_rate": 4.3826786650090273e-07,
105
- "logits/chosen": -5.023388385772705,
106
- "logits/rejected": -5.144254684448242,
107
- "logps/chosen": -250.6563720703125,
108
- "logps/rejected": -241.12484741210938,
109
- "loss": 0.993,
110
- "rewards/accuracies": 0.5843750238418579,
111
- "rewards/chosen": 1.217611312866211,
112
- "rewards/margins": 6.1895647048950195,
113
- "rewards/rejected": -4.97195291519165,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.36,
118
- "grad_norm": 1385.9054301583744,
119
- "learning_rate": 4.058724504646834e-07,
120
- "logits/chosen": -4.992190361022949,
121
- "logits/rejected": -5.075345039367676,
122
- "logps/chosen": -256.97406005859375,
123
- "logps/rejected": -242.94003295898438,
124
- "loss": 1.1539,
125
- "rewards/accuracies": 0.606249988079071,
126
- "rewards/chosen": 2.1734097003936768,
127
- "rewards/margins": 5.453003883361816,
128
- "rewards/rejected": -3.2795944213867188,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.41,
133
- "grad_norm": 1267.3737422156325,
134
- "learning_rate": 3.6846716561824967e-07,
135
- "logits/chosen": -5.066686630249023,
136
- "logits/rejected": -5.165375709533691,
137
- "logps/chosen": -246.781982421875,
138
- "logps/rejected": -232.3020477294922,
139
- "loss": 1.1127,
140
- "rewards/accuracies": 0.5562499761581421,
141
- "rewards/chosen": 2.182149887084961,
142
- "rewards/margins": 6.110042095184326,
143
- "rewards/rejected": -3.927891492843628,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.46,
148
- "grad_norm": 1414.9882610729042,
149
- "learning_rate": 3.272542485937368e-07,
150
- "logits/chosen": -5.056512355804443,
151
- "logits/rejected": -5.19997501373291,
152
- "logps/chosen": -236.23886108398438,
153
- "logps/rejected": -219.4969940185547,
154
- "loss": 1.1651,
155
- "rewards/accuracies": 0.59375,
156
- "rewards/chosen": 2.3071811199188232,
157
- "rewards/margins": 4.593169212341309,
158
- "rewards/rejected": -2.2859878540039062,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.51,
163
- "grad_norm": 1730.7459110414102,
164
- "learning_rate": 2.8355831645441387e-07,
165
- "logits/chosen": -5.051321506500244,
166
- "logits/rejected": -5.197503089904785,
167
- "logps/chosen": -245.94680786132812,
168
- "logps/rejected": -224.7979278564453,
169
- "loss": 1.1049,
170
- "rewards/accuracies": 0.643750011920929,
171
- "rewards/chosen": 2.0447471141815186,
172
- "rewards/margins": 3.989384412765503,
173
- "rewards/rejected": -1.9446370601654053,
174
  "step": 100
175
  },
176
  {
177
- "epoch": 0.56,
178
- "grad_norm": 1376.721155787266,
179
- "learning_rate": 2.3878379241237134e-07,
180
- "logits/chosen": -5.05279541015625,
181
- "logits/rejected": -5.2380499839782715,
182
- "logps/chosen": -231.46408081054688,
183
- "logps/rejected": -221.2686309814453,
184
- "loss": 1.0653,
185
- "rewards/accuracies": 0.637499988079071,
186
- "rewards/chosen": 2.9433412551879883,
187
- "rewards/margins": 7.433489799499512,
188
- "rewards/rejected": -4.490148544311523,
189
  "step": 110
190
  },
191
  {
192
- "epoch": 0.61,
193
- "grad_norm": 1298.5481767381427,
194
- "learning_rate": 1.9436976651092142e-07,
195
- "logits/chosen": -4.989577293395996,
196
- "logits/rejected": -5.143449306488037,
197
- "logps/chosen": -250.3534698486328,
198
- "logps/rejected": -237.04074096679688,
199
- "loss": 1.0694,
200
- "rewards/accuracies": 0.6343749761581421,
201
- "rewards/chosen": 2.3243861198425293,
202
- "rewards/margins": 8.470600128173828,
203
- "rewards/rejected": -6.146214485168457,
204
  "step": 120
205
  },
206
  {
207
- "epoch": 0.66,
208
- "grad_norm": 1456.9702892975145,
209
- "learning_rate": 1.517437420865191e-07,
210
- "logits/chosen": -5.036610126495361,
211
- "logits/rejected": -5.181552886962891,
212
- "logps/chosen": -234.2519073486328,
213
- "logps/rejected": -226.05050659179688,
214
- "loss": 1.1374,
215
- "rewards/accuracies": 0.612500011920929,
216
- "rewards/chosen": 2.612969160079956,
217
- "rewards/margins": 6.129396915435791,
218
- "rewards/rejected": -3.516427516937256,
219
  "step": 130
220
  },
221
  {
222
- "epoch": 0.72,
223
- "grad_norm": 1414.11944634508,
224
- "learning_rate": 1.1227575463697439e-07,
225
- "logits/chosen": -5.011117458343506,
226
- "logits/rejected": -5.0677995681762695,
227
- "logps/chosen": -246.2405242919922,
228
- "logps/rejected": -240.97647094726562,
229
- "loss": 1.0012,
230
- "rewards/accuracies": 0.6625000238418579,
231
- "rewards/chosen": 2.1312901973724365,
232
- "rewards/margins": 6.49268102645874,
233
- "rewards/rejected": -4.361390590667725,
234
  "step": 140
235
  },
236
  {
237
- "epoch": 0.77,
238
- "grad_norm": 1391.6252979817953,
239
- "learning_rate": 7.723433775328384e-08,
240
- "logits/chosen": -5.031737327575684,
241
- "logits/rejected": -5.141982078552246,
242
- "logps/chosen": -247.31640625,
243
- "logps/rejected": -245.01284790039062,
244
- "loss": 1.0468,
245
- "rewards/accuracies": 0.6625000238418579,
246
- "rewards/chosen": 3.413778781890869,
247
- "rewards/margins": 8.60617446899414,
248
- "rewards/rejected": -5.19239616394043,
249
  "step": 150
250
  },
251
- {
252
- "epoch": 0.82,
253
- "grad_norm": 1305.4800329449993,
254
- "learning_rate": 4.774575140626316e-08,
255
- "logits/chosen": -4.959289073944092,
256
- "logits/rejected": -5.040767192840576,
257
- "logps/chosen": -253.7027587890625,
258
- "logps/rejected": -250.91659545898438,
259
- "loss": 0.9992,
260
- "rewards/accuracies": 0.6781250238418579,
261
- "rewards/chosen": 3.046278476715088,
262
- "rewards/margins": 8.344175338745117,
263
- "rewards/rejected": -5.297896862030029,
264
- "step": 160
265
- },
266
- {
267
- "epoch": 0.87,
268
- "grad_norm": 1228.1104796269808,
269
- "learning_rate": 2.475778302439524e-08,
270
- "logits/chosen": -5.096159934997559,
271
- "logits/rejected": -5.178959369659424,
272
- "logps/chosen": -251.2628631591797,
273
- "logps/rejected": -233.06857299804688,
274
- "loss": 1.0057,
275
- "rewards/accuracies": 0.5843750238418579,
276
- "rewards/chosen": 2.824694871902466,
277
- "rewards/margins": 6.200740814208984,
278
- "rewards/rejected": -3.3760459423065186,
279
- "step": 170
280
- },
281
- {
282
- "epoch": 0.92,
283
- "grad_norm": 1348.827014256151,
284
- "learning_rate": 9.009284826036689e-09,
285
- "logits/chosen": -4.995651721954346,
286
- "logits/rejected": -5.102165222167969,
287
- "logps/chosen": -237.61990356445312,
288
- "logps/rejected": -232.7886962890625,
289
- "loss": 0.9321,
290
- "rewards/accuracies": 0.659375011920929,
291
- "rewards/chosen": 2.423119068145752,
292
- "rewards/margins": 4.8792009353637695,
293
- "rewards/rejected": -2.4560813903808594,
294
- "step": 180
295
- },
296
- {
297
- "epoch": 0.97,
298
- "grad_norm": 1117.1672982866971,
299
- "learning_rate": 1.0064265011902328e-09,
300
- "logits/chosen": -5.071808815002441,
301
- "logits/rejected": -5.110179901123047,
302
- "logps/chosen": -236.14224243164062,
303
- "logps/rejected": -233.5693359375,
304
- "loss": 0.9891,
305
- "rewards/accuracies": 0.640625,
306
- "rewards/chosen": 1.8652112483978271,
307
- "rewards/margins": 5.820201873779297,
308
- "rewards/rejected": -3.9549899101257324,
309
- "step": 190
310
- },
311
  {
312
  "epoch": 1.0,
313
- "step": 195,
314
  "total_flos": 0.0,
315
- "train_loss": 0.9975380127246564,
316
- "train_runtime": 5482.1546,
317
- "train_samples_per_second": 9.12,
318
- "train_steps_per_second": 0.036
319
  }
320
  ],
321
  "logging_steps": 10,
322
- "max_steps": 195,
323
  "num_input_tokens_seen": 0,
324
  "num_train_epochs": 1,
325
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9983361064891847,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "grad_norm": 1142.1729750161728,
14
+ "learning_rate": 6.666666666666666e-10,
15
+ "logits/chosen": -4.106247425079346,
16
+ "logits/rejected": -4.200438499450684,
17
+ "logps/chosen": -382.81439208984375,
18
+ "logps/rejected": -357.65960693359375,
19
+ "loss": 0.685,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.07,
28
+ "grad_norm": 1162.0154294843069,
29
+ "learning_rate": 6.666666666666667e-09,
30
+ "logits/chosen": -4.2175726890563965,
31
+ "logits/rejected": -4.321321487426758,
32
+ "logps/chosen": -334.61383056640625,
33
+ "logps/rejected": -313.4597473144531,
34
+ "loss": 0.7288,
35
+ "rewards/accuracies": 0.4756944477558136,
36
+ "rewards/chosen": -0.016012493520975113,
37
+ "rewards/margins": 0.006440857890993357,
38
+ "rewards/rejected": -0.022453350946307182,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.13,
43
+ "grad_norm": 1254.20259356522,
44
+ "learning_rate": 9.966191788709716e-09,
45
+ "logits/chosen": -4.266871452331543,
46
+ "logits/rejected": -4.419375896453857,
47
+ "logps/chosen": -313.91156005859375,
48
+ "logps/rejected": -288.8208923339844,
49
+ "loss": 0.7239,
50
+ "rewards/accuracies": 0.512499988079071,
51
+ "rewards/chosen": 0.0004551798047032207,
52
+ "rewards/margins": 0.007867029868066311,
53
+ "rewards/rejected": -0.0074118501506745815,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.2,
58
+ "grad_norm": 1168.2123312853032,
59
+ "learning_rate": 9.698463103929542e-09,
60
+ "logits/chosen": -4.302128791809082,
61
+ "logits/rejected": -4.365870475769043,
62
+ "logps/chosen": -308.2377014160156,
63
+ "logps/rejected": -285.6295471191406,
64
+ "loss": 0.7344,
65
+ "rewards/accuracies": 0.5249999761581421,
66
+ "rewards/chosen": 0.019400831311941147,
67
+ "rewards/margins": 0.019786948338150978,
68
+ "rewards/rejected": -0.0003861159202642739,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.27,
73
+ "grad_norm": 1099.047513823573,
74
+ "learning_rate": 9.177439057064682e-09,
75
+ "logits/chosen": -4.187483310699463,
76
+ "logits/rejected": -4.270766735076904,
77
+ "logps/chosen": -332.38433837890625,
78
+ "logps/rejected": -307.4942321777344,
79
+ "loss": 0.7115,
80
+ "rewards/accuracies": 0.5406249761581421,
81
+ "rewards/chosen": 0.0541040301322937,
82
+ "rewards/margins": 0.07320869714021683,
83
+ "rewards/rejected": -0.01910465955734253,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.33,
88
+ "grad_norm": 1141.2520247434832,
89
+ "learning_rate": 8.431208189343668e-09,
90
+ "logits/chosen": -4.198305606842041,
91
+ "logits/rejected": -4.367269992828369,
92
+ "logps/chosen": -333.6199645996094,
93
+ "logps/rejected": -308.95989990234375,
94
+ "loss": 0.7163,
95
+ "rewards/accuracies": 0.4906249940395355,
96
+ "rewards/chosen": 0.06264184415340424,
97
+ "rewards/margins": -0.024443484842777252,
98
+ "rewards/rejected": 0.08708532154560089,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.4,
103
+ "grad_norm": 1099.3679350302498,
104
+ "learning_rate": 7.500000000000001e-09,
105
+ "logits/chosen": -4.187924385070801,
106
+ "logits/rejected": -4.2703022956848145,
107
+ "logps/chosen": -323.7719421386719,
108
+ "logps/rejected": -308.23748779296875,
109
+ "loss": 0.7118,
110
+ "rewards/accuracies": 0.515625,
111
+ "rewards/chosen": 0.11530591547489166,
112
+ "rewards/margins": 0.03647974878549576,
113
+ "rewards/rejected": 0.07882615178823471,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.47,
118
+ "grad_norm": 1096.6613245075011,
119
+ "learning_rate": 6.434016163555451e-09,
120
+ "logits/chosen": -4.1484785079956055,
121
+ "logits/rejected": -4.303661346435547,
122
+ "logps/chosen": -344.68658447265625,
123
+ "logps/rejected": -314.47064208984375,
124
+ "loss": 0.6973,
125
+ "rewards/accuracies": 0.5625,
126
+ "rewards/chosen": 0.1820925623178482,
127
+ "rewards/margins": 0.10691970586776733,
128
+ "rewards/rejected": 0.07517284899950027,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.53,
133
+ "grad_norm": 1050.1170453783911,
134
+ "learning_rate": 5.290724144552379e-09,
135
+ "logits/chosen": -4.223504543304443,
136
+ "logits/rejected": -4.3897480964660645,
137
+ "logps/chosen": -334.43511962890625,
138
+ "logps/rejected": -305.484375,
139
+ "loss": 0.6912,
140
+ "rewards/accuracies": 0.559374988079071,
141
+ "rewards/chosen": 0.20204909145832062,
142
+ "rewards/margins": 0.08096315711736679,
143
+ "rewards/rejected": 0.12108592689037323,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.6,
148
+ "grad_norm": 1080.8957995779926,
149
+ "learning_rate": 4.131759111665349e-09,
150
+ "logits/chosen": -4.228762626647949,
151
+ "logits/rejected": -4.349400997161865,
152
+ "logps/chosen": -327.1580810546875,
153
+ "logps/rejected": -307.87689208984375,
154
+ "loss": 0.674,
155
+ "rewards/accuracies": 0.550000011920929,
156
+ "rewards/chosen": 0.24468111991882324,
157
+ "rewards/margins": 0.05739554762840271,
158
+ "rewards/rejected": 0.18728554248809814,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.67,
163
+ "grad_norm": 1086.0982504773885,
164
+ "learning_rate": 3.0196011698042157e-09,
165
+ "logits/chosen": -4.210589408874512,
166
+ "logits/rejected": -4.420603275299072,
167
+ "logps/chosen": -320.5531311035156,
168
+ "logps/rejected": -289.7651062011719,
169
+ "loss": 0.6799,
170
+ "rewards/accuracies": 0.578125,
171
+ "rewards/chosen": 0.2572989761829376,
172
+ "rewards/margins": 0.10579316318035126,
173
+ "rewards/rejected": 0.15150579810142517,
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.73,
178
+ "grad_norm": 1114.0041176823654,
179
+ "learning_rate": 2.0142070414860704e-09,
180
+ "logits/chosen": -4.225996971130371,
181
+ "logits/rejected": -4.2789506912231445,
182
+ "logps/chosen": -314.6085205078125,
183
+ "logps/rejected": -303.3541564941406,
184
+ "loss": 0.6851,
185
+ "rewards/accuracies": 0.546875,
186
+ "rewards/chosen": 0.27688390016555786,
187
+ "rewards/margins": 0.041596584022045135,
188
+ "rewards/rejected": 0.23528733849525452,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.8,
193
+ "grad_norm": 1097.8240578626694,
194
+ "learning_rate": 1.1697777844051105e-09,
195
+ "logits/chosen": -4.1503801345825195,
196
+ "logits/rejected": -4.306635856628418,
197
+ "logps/chosen": -338.7808837890625,
198
+ "logps/rejected": -313.2768249511719,
199
+ "loss": 0.6758,
200
+ "rewards/accuracies": 0.5874999761581421,
201
+ "rewards/chosen": 0.3373282849788666,
202
+ "rewards/margins": 0.1483292281627655,
203
+ "rewards/rejected": 0.18899908661842346,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.87,
208
+ "grad_norm": 1113.3294478605874,
209
+ "learning_rate": 5.318367983829391e-10,
210
+ "logits/chosen": -4.178295135498047,
211
+ "logits/rejected": -4.3724284172058105,
212
+ "logps/chosen": -323.9390563964844,
213
+ "logps/rejected": -304.91119384765625,
214
+ "loss": 0.6643,
215
+ "rewards/accuracies": 0.574999988079071,
216
+ "rewards/chosen": 0.340619832277298,
217
+ "rewards/margins": 0.12920674681663513,
218
+ "rewards/rejected": 0.21141307055950165,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.93,
223
+ "grad_norm": 1112.5811910392042,
224
+ "learning_rate": 1.3477564710088098e-10,
225
+ "logits/chosen": -4.293918609619141,
226
+ "logits/rejected": -4.359633445739746,
227
+ "logps/chosen": -307.56317138671875,
228
+ "logps/rejected": -297.0579833984375,
229
+ "loss": 0.6697,
230
+ "rewards/accuracies": 0.596875011920929,
231
+ "rewards/chosen": 0.34358957409858704,
232
+ "rewards/margins": 0.11470258235931396,
233
+ "rewards/rejected": 0.22888696193695068,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 1.0,
238
+ "grad_norm": 1082.9191655386894,
239
+ "learning_rate": 0.0,
240
+ "logits/chosen": -4.258730411529541,
241
+ "logits/rejected": -4.332475185394287,
242
+ "logps/chosen": -312.3280029296875,
243
+ "logps/rejected": -300.03082275390625,
244
+ "loss": 0.6661,
245
+ "rewards/accuracies": 0.574999988079071,
246
+ "rewards/chosen": 0.3249002993106842,
247
+ "rewards/margins": 0.10186745971441269,
248
+ "rewards/rejected": 0.2230328619480133,
249
  "step": 150
250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  {
252
  "epoch": 1.0,
253
+ "step": 150,
254
  "total_flos": 0.0,
255
+ "train_loss": 0.6950656716028849,
256
+ "train_runtime": 4446.5407,
257
+ "train_samples_per_second": 8.646,
258
+ "train_steps_per_second": 0.034
259
  }
260
  ],
261
  "logging_steps": 10,
262
+ "max_steps": 150,
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 1,
265
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85d84aecf33b97eca429c3cb0dfe5821c88742011a5722789d687f715f23c666
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16edf0d5fed8efa4c443f7dad2dc6604d5b4bc312040d7965f4bee8ee9215478
3
  size 6264