RikkiXu commited on
Commit
7762505
1 Parent(s): 3dd25af

Model save

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 5e-07
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 5e-09
36
  - train_batch_size: 8
37
  - eval_batch_size: 8
38
  - seed: 42
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.2810401298381664,
4
- "train_runtime": 5417.1403,
5
- "train_samples": 48530,
6
- "train_samples_per_second": 8.959,
7
- "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.7162073644002279,
4
+ "train_runtime": 4512.4874,
5
+ "train_samples": 38445,
6
+ "train_samples_per_second": 8.52,
7
+ "train_steps_per_second": 0.033
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b008443197fefaac049fe7a0eabc01b0151b73564d92afc7767c1fa8c71e1ca8
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c198eb9df08ec1da45189177d9165731677bcf8abcb6d6938bc58bf75d03977d
3
  size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c3ff196fe6ec2e2cfd1db6b1aeb3c9df6fa0eb8b2d7025d4ac80bcd6c50ae14
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57087d3701236ddd033204862662fca1d328a1d40449da21fe52584fb50fefff
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:026ec669932b9619a7d776b17104d80d5e127fd258a8759a65b68cadf6158a75
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d22d542aa9e2e706d1339613cd7b76b3452024d5fd48f29b7543bbc9e61fc003
3
  size 4540532728
runs/Jun17_01-47-24_n136-112-146/events.out.tfevents.1718560498.n136-112-146.891340.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d3f8e8736abd04ccc11ca702964740dd849bf1b59def5f9d196dc08153f92d7
3
- size 12362
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3d21db7f59f214e4271167668304d02bf7010ba64ca49e16ae58fc95cdcd30
3
+ size 16132
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.2810401298381664,
4
- "train_runtime": 5417.1403,
5
- "train_samples": 48530,
6
- "train_samples_per_second": 8.959,
7
- "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.7162073644002279,
4
+ "train_runtime": 4512.4874,
5
+ "train_samples": 38445,
6
+ "train_samples_per_second": 8.52,
7
+ "train_steps_per_second": 0.033
8
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9960474308300395,
5
  "eval_steps": 500,
6
- "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "grad_norm": 4737.779382861946,
14
- "learning_rate": 2.6315789473684208e-08,
15
- "logits/chosen": -4.638427734375,
16
- "logits/rejected": -4.891327857971191,
17
- "logps/chosen": -198.52749633789062,
18
- "logps/rejected": -147.3392791748047,
19
- "loss": 2.1269,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,287 +24,242 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.05,
28
- "grad_norm": 3019.619079402316,
29
- "learning_rate": 2.631578947368421e-07,
30
- "logits/chosen": -4.496801376342773,
31
- "logits/rejected": -4.816222190856934,
32
- "logps/chosen": -224.27357482910156,
33
- "logps/rejected": -168.04739379882812,
34
- "loss": 1.9212,
35
- "rewards/accuracies": 0.5381944179534912,
36
- "rewards/chosen": 0.3541475236415863,
37
- "rewards/margins": 0.37169286608695984,
38
- "rewards/rejected": -0.017545383423566818,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.11,
43
- "grad_norm": 1863.4195630562826,
44
- "learning_rate": 4.999573126145131e-07,
45
- "logits/chosen": -4.533459663391113,
46
- "logits/rejected": -4.848563194274902,
47
- "logps/chosen": -220.4309539794922,
48
- "logps/rejected": -180.72413635253906,
49
- "loss": 1.1783,
50
- "rewards/accuracies": 0.8343750238418579,
51
- "rewards/chosen": 5.777209281921387,
52
- "rewards/margins": 6.331713676452637,
53
- "rewards/rejected": -0.5545047521591187,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.16,
58
- "grad_norm": 1882.3555396757283,
59
- "learning_rate": 4.948524419003415e-07,
60
- "logits/chosen": -4.54370641708374,
61
- "logits/rejected": -4.812285423278809,
62
- "logps/chosen": -213.49411010742188,
63
- "logps/rejected": -177.16848754882812,
64
- "loss": 1.3518,
65
- "rewards/accuracies": 0.8531249761581421,
66
- "rewards/chosen": 8.2464017868042,
67
- "rewards/margins": 12.621076583862305,
68
- "rewards/rejected": -4.3746747970581055,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.21,
73
- "grad_norm": 1979.651061288252,
74
- "learning_rate": 4.81409414945389e-07,
75
- "logits/chosen": -4.619187831878662,
76
- "logits/rejected": -4.8958845138549805,
77
- "logps/chosen": -221.00082397460938,
78
- "logps/rejected": -184.62203979492188,
79
- "loss": 1.4689,
80
- "rewards/accuracies": 0.8343750238418579,
81
- "rewards/chosen": 7.427975654602051,
82
- "rewards/margins": 15.529205322265625,
83
- "rewards/rejected": -8.101228713989258,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 0.26,
88
- "grad_norm": 2122.235483955093,
89
- "learning_rate": 4.6008601790947314e-07,
90
- "logits/chosen": -4.608691215515137,
91
- "logits/rejected": -4.925226211547852,
92
- "logps/chosen": -210.32058715820312,
93
- "logps/rejected": -179.0367431640625,
94
- "loss": 1.3821,
95
- "rewards/accuracies": 0.859375,
96
- "rewards/chosen": 6.428221225738525,
97
- "rewards/margins": 16.42898941040039,
98
- "rewards/rejected": -10.00076961517334,
99
  "step": 50
100
  },
101
  {
102
- "epoch": 0.32,
103
- "grad_norm": 1692.2277360562514,
104
- "learning_rate": 4.3160839350405605e-07,
105
- "logits/chosen": -4.665585994720459,
106
- "logits/rejected": -4.9272074699401855,
107
- "logps/chosen": -205.7926788330078,
108
- "logps/rejected": -178.56011962890625,
109
- "loss": 1.3465,
110
- "rewards/accuracies": 0.859375,
111
- "rewards/chosen": 7.834652900695801,
112
- "rewards/margins": 16.5399169921875,
113
- "rewards/rejected": -8.705263137817383,
114
  "step": 60
115
  },
116
  {
117
- "epoch": 0.37,
118
- "grad_norm": 1810.8868167381333,
119
- "learning_rate": 3.9694631307311825e-07,
120
- "logits/chosen": -4.6464009284973145,
121
- "logits/rejected": -4.913968086242676,
122
- "logps/chosen": -207.1618194580078,
123
- "logps/rejected": -182.61012268066406,
124
- "loss": 1.3564,
125
- "rewards/accuracies": 0.8656250238418579,
126
- "rewards/chosen": 8.192334175109863,
127
- "rewards/margins": 16.401655197143555,
128
- "rewards/rejected": -8.209321975708008,
129
  "step": 70
130
  },
131
  {
132
- "epoch": 0.42,
133
- "grad_norm": 1395.6821844604426,
134
- "learning_rate": 3.572801521931522e-07,
135
- "logits/chosen": -4.674800395965576,
136
- "logits/rejected": -4.932587623596191,
137
- "logps/chosen": -202.7789764404297,
138
- "logps/rejected": -184.74395751953125,
139
- "loss": 1.312,
140
- "rewards/accuracies": 0.846875011920929,
141
- "rewards/chosen": 5.3585076332092285,
142
- "rewards/margins": 16.119762420654297,
143
- "rewards/rejected": -10.761255264282227,
144
  "step": 80
145
  },
146
  {
147
- "epoch": 0.47,
148
- "grad_norm": 1761.719146022038,
149
- "learning_rate": 3.139606943986089e-07,
150
- "logits/chosen": -4.721759796142578,
151
- "logits/rejected": -4.953747272491455,
152
- "logps/chosen": -199.81448364257812,
153
- "logps/rejected": -178.44004821777344,
154
- "loss": 1.3425,
155
- "rewards/accuracies": 0.815625011920929,
156
- "rewards/chosen": 7.849789619445801,
157
- "rewards/margins": 15.739909172058105,
158
- "rewards/rejected": -7.890120029449463,
159
  "step": 90
160
  },
161
  {
162
- "epoch": 0.53,
163
- "grad_norm": 1641.4466240114464,
164
- "learning_rate": 2.684631318687185e-07,
165
- "logits/chosen": -4.7313385009765625,
166
- "logits/rejected": -4.984685897827148,
167
- "logps/chosen": -213.2564239501953,
168
- "logps/rejected": -190.69088745117188,
169
- "loss": 1.3623,
170
- "rewards/accuracies": 0.862500011920929,
171
- "rewards/chosen": 5.4120659828186035,
172
- "rewards/margins": 16.620161056518555,
173
- "rewards/rejected": -11.208093643188477,
174
  "step": 100
175
  },
176
  {
177
- "epoch": 0.58,
178
- "grad_norm": 1346.9601711684072,
179
- "learning_rate": 2.2233682952712483e-07,
180
- "logits/chosen": -4.668034553527832,
181
- "logits/rejected": -4.953825950622559,
182
- "logps/chosen": -216.8499298095703,
183
- "logps/rejected": -186.10470581054688,
184
- "loss": 1.1234,
185
- "rewards/accuracies": 0.856249988079071,
186
- "rewards/chosen": 7.529428005218506,
187
- "rewards/margins": 15.865753173828125,
188
- "rewards/rejected": -8.336324691772461,
189
  "step": 110
190
  },
191
  {
192
- "epoch": 0.63,
193
- "grad_norm": 1999.3900490416042,
194
- "learning_rate": 1.7715256327766884e-07,
195
- "logits/chosen": -4.720789909362793,
196
- "logits/rejected": -5.025943279266357,
197
- "logps/chosen": -207.7978973388672,
198
- "logps/rejected": -178.0445098876953,
199
- "loss": 1.1185,
200
- "rewards/accuracies": 0.859375,
201
- "rewards/chosen": 8.364091873168945,
202
- "rewards/margins": 16.011329650878906,
203
- "rewards/rejected": -7.647237300872803,
204
  "step": 120
205
  },
206
  {
207
- "epoch": 0.69,
208
- "grad_norm": 1568.9083661238265,
209
- "learning_rate": 1.3444902911492174e-07,
210
- "logits/chosen": -4.702408790588379,
211
- "logits/rejected": -4.98063325881958,
212
- "logps/chosen": -215.88174438476562,
213
- "logps/rejected": -188.39645385742188,
214
- "loss": 1.2748,
215
- "rewards/accuracies": 0.8531249761581421,
216
- "rewards/chosen": 7.107934474945068,
217
- "rewards/margins": 16.361108779907227,
218
- "rewards/rejected": -9.253173828125,
219
  "step": 130
220
  },
221
  {
222
- "epoch": 0.74,
223
- "grad_norm": 1390.666498149427,
224
- "learning_rate": 9.56804446775518e-08,
225
- "logits/chosen": -4.6129560470581055,
226
- "logits/rejected": -4.923257350921631,
227
- "logps/chosen": -208.4700469970703,
228
- "logps/rejected": -178.78623962402344,
229
- "loss": 1.0987,
230
- "rewards/accuracies": 0.859375,
231
- "rewards/chosen": 6.485724449157715,
232
- "rewards/margins": 17.193899154663086,
233
- "rewards/rejected": -10.708174705505371,
234
  "step": 140
235
  },
236
  {
237
- "epoch": 0.79,
238
- "grad_norm": 1296.5821049110084,
239
- "learning_rate": 6.216702761078166e-08,
240
- "logits/chosen": -4.699868202209473,
241
- "logits/rejected": -4.9864583015441895,
242
- "logps/chosen": -196.650146484375,
243
- "logps/rejected": -168.93551635742188,
244
- "loss": 1.0856,
245
- "rewards/accuracies": 0.8968750238418579,
246
- "rewards/chosen": 7.758223056793213,
247
- "rewards/margins": 17.15636444091797,
248
- "rewards/rejected": -9.398139953613281,
249
  "step": 150
250
  },
251
- {
252
- "epoch": 0.84,
253
- "grad_norm": 1769.0071097352081,
254
- "learning_rate": 3.5050037137906885e-08,
255
- "logits/chosen": -4.634187698364258,
256
- "logits/rejected": -4.958773612976074,
257
- "logps/chosen": -211.03591918945312,
258
- "logps/rejected": -176.72067260742188,
259
- "loss": 1.1749,
260
- "rewards/accuracies": 0.8531249761581421,
261
- "rewards/chosen": 8.515033721923828,
262
- "rewards/margins": 16.594724655151367,
263
- "rewards/rejected": -8.079689979553223,
264
- "step": 160
265
- },
266
- {
267
- "epoch": 0.9,
268
- "grad_norm": 1606.699013433802,
269
- "learning_rate": 1.5252909846235894e-08,
270
- "logits/chosen": -4.62954044342041,
271
- "logits/rejected": -4.913142204284668,
272
- "logps/chosen": -209.8083953857422,
273
- "logps/rejected": -184.52127075195312,
274
- "loss": 1.2059,
275
- "rewards/accuracies": 0.9125000238418579,
276
- "rewards/chosen": 9.714839935302734,
277
- "rewards/margins": 18.106443405151367,
278
- "rewards/rejected": -8.391606330871582,
279
- "step": 170
280
- },
281
- {
282
- "epoch": 0.95,
283
- "grad_norm": 1242.5060745172418,
284
- "learning_rate": 3.4498131616493565e-09,
285
- "logits/chosen": -4.616083145141602,
286
- "logits/rejected": -4.87780237197876,
287
- "logps/chosen": -215.27685546875,
288
- "logps/rejected": -193.55332946777344,
289
- "loss": 1.1132,
290
- "rewards/accuracies": 0.8125,
291
- "rewards/chosen": 6.19677209854126,
292
- "rewards/margins": 13.694157600402832,
293
- "rewards/rejected": -7.497385501861572,
294
- "step": 180
295
- },
296
  {
297
  "epoch": 1.0,
298
- "step": 189,
299
  "total_flos": 0.0,
300
- "train_loss": 1.2810401298381664,
301
- "train_runtime": 5417.1403,
302
- "train_samples_per_second": 8.959,
303
- "train_steps_per_second": 0.035
304
  }
305
  ],
306
  "logging_steps": 10,
307
- "max_steps": 189,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 1,
310
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9983361064891847,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "grad_norm": 1151.6129333090275,
14
+ "learning_rate": 3.333333333333333e-10,
15
+ "logits/chosen": -4.106247425079346,
16
+ "logits/rejected": -4.200438499450684,
17
+ "logps/chosen": -382.81439208984375,
18
+ "logps/rejected": -357.65960693359375,
19
+ "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.07,
28
+ "grad_norm": 1175.0279383615512,
29
+ "learning_rate": 3.3333333333333334e-09,
30
+ "logits/chosen": -4.217203617095947,
31
+ "logits/rejected": -4.32081413269043,
32
+ "logps/chosen": -334.6714172363281,
33
+ "logps/rejected": -313.4322509765625,
34
+ "loss": 0.7317,
35
+ "rewards/accuracies": 0.3923611044883728,
36
+ "rewards/chosen": -0.044815655797719955,
37
+ "rewards/margins": -0.03612741455435753,
38
+ "rewards/rejected": -0.008688241243362427,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.13,
43
+ "grad_norm": 1253.9538230101048,
44
+ "learning_rate": 4.983095894354858e-09,
45
+ "logits/chosen": -4.266427040100098,
46
+ "logits/rejected": -4.4187798500061035,
47
+ "logps/chosen": -313.9143371582031,
48
+ "logps/rejected": -288.782470703125,
49
+ "loss": 0.732,
50
+ "rewards/accuracies": 0.4781250059604645,
51
+ "rewards/chosen": -0.000932177877984941,
52
+ "rewards/margins": -0.012739461846649647,
53
+ "rewards/rejected": 0.011807283386588097,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.2,
58
+ "grad_norm": 1147.308654200848,
59
+ "learning_rate": 4.849231551964771e-09,
60
+ "logits/chosen": -4.301184177398682,
61
+ "logits/rejected": -4.36545991897583,
62
+ "logps/chosen": -308.3140563964844,
63
+ "logps/rejected": -285.6829833984375,
64
+ "loss": 0.7352,
65
+ "rewards/accuracies": 0.46562498807907104,
66
+ "rewards/chosen": -0.018803134560585022,
67
+ "rewards/margins": 0.008298242464661598,
68
+ "rewards/rejected": -0.02710137702524662,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.27,
73
+ "grad_norm": 1152.01480268075,
74
+ "learning_rate": 4.588719528532341e-09,
75
+ "logits/chosen": -4.186487674713135,
76
+ "logits/rejected": -4.270204544067383,
77
+ "logps/chosen": -332.467041015625,
78
+ "logps/rejected": -307.530517578125,
79
+ "loss": 0.722,
80
+ "rewards/accuracies": 0.5531250238418579,
81
+ "rewards/chosen": 0.012754167430102825,
82
+ "rewards/margins": 0.04999501258134842,
83
+ "rewards/rejected": -0.03724084421992302,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.33,
88
+ "grad_norm": 1161.340877574017,
89
+ "learning_rate": 4.215604094671834e-09,
90
+ "logits/chosen": -4.197475910186768,
91
+ "logits/rejected": -4.366654396057129,
92
+ "logps/chosen": -333.69940185546875,
93
+ "logps/rejected": -309.05511474609375,
94
+ "loss": 0.7263,
95
+ "rewards/accuracies": 0.512499988079071,
96
+ "rewards/chosen": 0.022939234972000122,
97
+ "rewards/margins": -0.01653190515935421,
98
+ "rewards/rejected": 0.03947114199399948,
99
  "step": 50
100
  },
101
  {
102
+ "epoch": 0.4,
103
+ "grad_norm": 1135.5326964714952,
104
+ "learning_rate": 3.7500000000000005e-09,
105
+ "logits/chosen": -4.186155796051025,
106
+ "logits/rejected": -4.269167900085449,
107
+ "logps/chosen": -323.9843444824219,
108
+ "logps/rejected": -308.364990234375,
109
+ "loss": 0.7133,
110
+ "rewards/accuracies": 0.484375,
111
+ "rewards/chosen": 0.009113344363868237,
112
+ "rewards/margins": -0.005971288774162531,
113
+ "rewards/rejected": 0.015084633603692055,
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 0.47,
118
+ "grad_norm": 1170.598014061488,
119
+ "learning_rate": 3.2170080817777257e-09,
120
+ "logits/chosen": -4.14601469039917,
121
+ "logits/rejected": -4.301178932189941,
122
+ "logps/chosen": -344.8753967285156,
123
+ "logps/rejected": -314.6096496582031,
124
+ "loss": 0.7182,
125
+ "rewards/accuracies": 0.581250011920929,
126
+ "rewards/chosen": 0.08767497539520264,
127
+ "rewards/margins": 0.08200599253177643,
128
+ "rewards/rejected": 0.005668987520039082,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 0.53,
133
+ "grad_norm": 1088.822875661876,
134
+ "learning_rate": 2.6453620722761897e-09,
135
+ "logits/chosen": -4.220850944519043,
136
+ "logits/rejected": -4.3867106437683105,
137
+ "logps/chosen": -334.7141418457031,
138
+ "logps/rejected": -305.6958923339844,
139
+ "loss": 0.7175,
140
+ "rewards/accuracies": 0.515625,
141
+ "rewards/chosen": 0.06255482137203217,
142
+ "rewards/margins": 0.04721928387880325,
143
+ "rewards/rejected": 0.015335534699261189,
144
  "step": 80
145
  },
146
  {
147
+ "epoch": 0.6,
148
+ "grad_norm": 1120.1472438016,
149
+ "learning_rate": 2.0658795558326744e-09,
150
+ "logits/chosen": -4.226521968841553,
151
+ "logits/rejected": -4.347161293029785,
152
+ "logps/chosen": -327.5687561035156,
153
+ "logps/rejected": -308.19744873046875,
154
+ "loss": 0.7128,
155
+ "rewards/accuracies": 0.518750011920929,
156
+ "rewards/chosen": 0.03935312479734421,
157
+ "rewards/margins": 0.012340927496552467,
158
+ "rewards/rejected": 0.02701219543814659,
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 0.67,
163
+ "grad_norm": 1147.49194388717,
164
+ "learning_rate": 1.5098005849021078e-09,
165
+ "logits/chosen": -4.208071708679199,
166
+ "logits/rejected": -4.417771339416504,
167
+ "logps/chosen": -320.9470520019531,
168
+ "logps/rejected": -290.018310546875,
169
+ "loss": 0.712,
170
+ "rewards/accuracies": 0.5218750238418579,
171
+ "rewards/chosen": 0.060337893664836884,
172
+ "rewards/margins": 0.03545590117573738,
173
+ "rewards/rejected": 0.0248819962143898,
174
  "step": 100
175
  },
176
  {
177
+ "epoch": 0.73,
178
+ "grad_norm": 1131.801124941311,
179
+ "learning_rate": 1.0071035207430352e-09,
180
+ "logits/chosen": -4.222798824310303,
181
+ "logits/rejected": -4.275721549987793,
182
+ "logps/chosen": -315.00030517578125,
183
+ "logps/rejected": -303.7385559082031,
184
+ "loss": 0.7022,
185
+ "rewards/accuracies": 0.543749988079071,
186
+ "rewards/chosen": 0.08098876476287842,
187
+ "rewards/margins": 0.037886131554841995,
188
+ "rewards/rejected": 0.04310264065861702,
189
  "step": 110
190
  },
191
  {
192
+ "epoch": 0.8,
193
+ "grad_norm": 1201.8232728734756,
194
+ "learning_rate": 5.848888922025553e-10,
195
+ "logits/chosen": -4.147335052490234,
196
+ "logits/rejected": -4.3030242919921875,
197
+ "logps/chosen": -339.2781677246094,
198
+ "logps/rejected": -313.5749206542969,
199
+ "loss": 0.7129,
200
+ "rewards/accuracies": 0.543749988079071,
201
+ "rewards/chosen": 0.08869560062885284,
202
+ "rewards/margins": 0.048744406551122665,
203
+ "rewards/rejected": 0.03995119035243988,
204
  "step": 120
205
  },
206
  {
207
+ "epoch": 0.87,
208
+ "grad_norm": 1182.4206440855232,
209
+ "learning_rate": 2.659183991914696e-10,
210
+ "logits/chosen": -4.173482418060303,
211
+ "logits/rejected": -4.367284297943115,
212
+ "logps/chosen": -324.4493713378906,
213
+ "logps/rejected": -305.2012634277344,
214
+ "loss": 0.7,
215
+ "rewards/accuracies": 0.5062500238418579,
216
+ "rewards/chosen": 0.08546491712331772,
217
+ "rewards/margins": 0.01908993348479271,
218
+ "rewards/rejected": 0.06637498736381531,
219
  "step": 130
220
  },
221
  {
222
+ "epoch": 0.93,
223
+ "grad_norm": 1158.9214495930962,
224
+ "learning_rate": 6.738782355044049e-11,
225
+ "logits/chosen": -4.290203094482422,
226
+ "logits/rejected": -4.35637092590332,
227
+ "logps/chosen": -308.0101318359375,
228
+ "logps/rejected": -297.37701416015625,
229
+ "loss": 0.7082,
230
+ "rewards/accuracies": 0.5406249761581421,
231
+ "rewards/chosen": 0.12010886520147324,
232
+ "rewards/margins": 0.05074785277247429,
233
+ "rewards/rejected": 0.06936100870370865,
234
  "step": 140
235
  },
236
  {
237
+ "epoch": 1.0,
238
+ "grad_norm": 1208.2615961890754,
239
+ "learning_rate": 0.0,
240
+ "logits/chosen": -4.257304668426514,
241
+ "logits/rejected": -4.3310017585754395,
242
+ "logps/chosen": -312.7613830566406,
243
+ "logps/rejected": -300.3546447753906,
244
+ "loss": 0.7025,
245
+ "rewards/accuracies": 0.5562499761581421,
246
+ "rewards/chosen": 0.10821112245321274,
247
+ "rewards/margins": 0.047078292816877365,
248
+ "rewards/rejected": 0.06113281846046448,
249
  "step": 150
250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  {
252
  "epoch": 1.0,
253
+ "step": 150,
254
  "total_flos": 0.0,
255
+ "train_loss": 0.7162073644002279,
256
+ "train_runtime": 4512.4874,
257
+ "train_samples_per_second": 8.52,
258
+ "train_steps_per_second": 0.033
259
  }
260
  ],
261
  "logging_steps": 10,
262
+ "max_steps": 150,
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 1,
265
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bb6c258154054c1d311e020aa1a0b2699cd9e54ea2ade0bdf6796fef174dbcd
3
  size 6328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9928d76a7e7521b67335c6d05e83d75eb13e444587e18aa0436ce552c75b5fd
3
  size 6328