SamMikaelson commited on
Commit
ef895e2
·
verified ·
1 Parent(s): 049e119

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "down_proj",
33
- "up_proj",
34
  "gate_proj",
35
- "v_proj",
36
  "o_proj",
 
 
37
  "q_proj",
38
- "k_proj"
 
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
 
32
  "gate_proj",
 
33
  "o_proj",
34
+ "down_proj",
35
+ "up_proj",
36
  "q_proj",
37
+ "k_proj",
38
+ "v_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5fe9cd11eab35635b3e40afb66399670464aeb435423f4a450cca4a4ac3626a
3
  size 264308896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43fd707af932d69932231314b4398195f40859ba5286a2a0d59c081c87ccb77b
3
  size 264308896
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c19907c05be97657db095ba64e710f3f3f486bd1ed0860b1bbd6662d2b1e5b1e
3
- size 136089395
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843040dcad4f852768a06d9e594c999813eac472c3f65f7443969358b67099aa
3
+ size 136089907
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b144eceedfa2101476447173dfe3d346a36b7516fd4322dfd02b73b27d9b2310
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c12bd7452a7c59fb850dbff4b78e1bad4cd5c1923c2b1c3ec3478be5c61e7f77
3
  size 14645
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fee0874fa9afae54661807fadac685c3d3f843473b6af99cc43d812ec6e1b36
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1d69bd3f86e65d28af2330480e9d8542f90d84e33fa16c1fd5116fa1a5b336
3
  size 1383
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c1b3d3a192c565272886289c8e8ac137d3c10918ed8d56d761a18da40f9c741
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6025d642d364613419905274d3700452dbaf1fd69dfd7c5f8f57368bc61dfbbe
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0013753266400770184,
6
  "eval_steps": 500,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -64,11 +64,935 @@
64
  "rewards/match_format_exactly/mean": 1.0,
65
  "rewards/match_format_exactly/std": 0.0,
66
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  ],
69
  "logging_steps": 10,
70
- "max_steps": 4000,
71
- "num_input_tokens_seen": 51474,
72
  "num_train_epochs": 1,
73
  "save_steps": 10,
74
  "stateful_callbacks": {
@@ -78,7 +1002,7 @@
78
  "should_evaluate": false,
79
  "should_log": false,
80
  "should_save": true,
81
- "should_training_stop": false
82
  },
83
  "attributes": {}
84
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.02406821620134782,
6
  "eval_steps": 500,
7
+ "global_step": 350,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
64
  "rewards/match_format_exactly/mean": 1.0,
65
  "rewards/match_format_exactly/std": 0.0,
66
  "step": 20
67
+ },
68
+ {
69
+ "completion_length": 299.05,
70
+ "completions/clipped_ratio": 0.025,
71
+ "completions/max_length": 684.5,
72
+ "completions/max_terminated_length": 582.1,
73
+ "completions/mean_length": 299.05,
74
+ "completions/mean_terminated_length": 268.775,
75
+ "completions/min_length": 12.0,
76
+ "completions/min_terminated_length": 12.0,
77
+ "epoch": 0.0020629899601155273,
78
+ "frac_reward_zero_std": 0.0,
79
+ "grad_norm": 1.6133402585983276,
80
+ "kl": 1.2915641874074937,
81
+ "learning_rate": 4.1428571428571435e-06,
82
+ "loss": 0.0013,
83
+ "num_tokens": 75404.0,
84
+ "reward": 4.5625,
85
+ "reward_std": 0.6107304871082306,
86
+ "rewards/check_coherence/mean": 0.6125,
87
+ "rewards/check_coherence/std": 0.403445702791214,
88
+ "rewards/check_response_quality/mean": 2.2125,
89
+ "rewards/check_response_quality/std": 0.2752987265586853,
90
+ "rewards/match_format_approximately/mean": 0.7375,
91
+ "rewards/match_format_approximately/std": 0.2404700517654419,
92
+ "rewards/match_format_exactly/mean": 1.0,
93
+ "rewards/match_format_exactly/std": 0.0,
94
+ "step": 30
95
+ },
96
+ {
97
+ "completion_length": 347.025,
98
+ "completions/clipped_ratio": 0.075,
99
+ "completions/max_length": 862.6,
100
+ "completions/max_terminated_length": 509.7,
101
+ "completions/mean_length": 347.025,
102
+ "completions/mean_terminated_length": 242.34166870117187,
103
+ "completions/min_length": 58.0,
104
+ "completions/min_terminated_length": 58.0,
105
+ "epoch": 0.0027506532801540367,
106
+ "frac_reward_zero_std": 0.1,
107
+ "grad_norm": 2.190549850463867,
108
+ "kl": 0.8691788390278816,
109
+ "learning_rate": 4.936507936507937e-06,
110
+ "loss": 0.0009,
111
+ "num_tokens": 102089.0,
112
+ "reward": 4.775,
113
+ "reward_std": 0.5918105363845825,
114
+ "rewards/check_coherence/mean": 0.85,
115
+ "rewards/check_coherence/std": 0.345650315284729,
116
+ "rewards/check_response_quality/mean": 2.2,
117
+ "rewards/check_response_quality/std": 0.20347774028778076,
118
+ "rewards/match_format_approximately/mean": 0.725,
119
+ "rewards/match_format_approximately/std": 0.16547005176544188,
120
+ "rewards/match_format_exactly/mean": 1.0,
121
+ "rewards/match_format_exactly/std": 0.0,
122
+ "step": 40
123
+ },
124
+ {
125
+ "completion_length": 278.275,
126
+ "completions/clipped_ratio": 0.025,
127
+ "completions/max_length": 695.3,
128
+ "completions/max_terminated_length": 556.3,
129
+ "completions/mean_length": 278.275,
130
+ "completions/mean_terminated_length": 240.375,
131
+ "completions/min_length": 19.8,
132
+ "completions/min_terminated_length": 19.8,
133
+ "epoch": 0.0034383166001925457,
134
+ "frac_reward_zero_std": 0.0,
135
+ "grad_norm": 1.1459013223648071,
136
+ "kl": 0.7370456486940384,
137
+ "learning_rate": 4.793650793650794e-06,
138
+ "loss": 0.0007,
139
+ "num_tokens": 125192.0,
140
+ "reward": 4.7375,
141
+ "reward_std": 0.9061064124107361,
142
+ "rewards/check_coherence/mean": 0.8625,
143
+ "rewards/check_coherence/std": 0.4861203670501709,
144
+ "rewards/check_response_quality/mean": 2.175,
145
+ "rewards/check_response_quality/std": 0.34289742112159727,
146
+ "rewards/match_format_approximately/mean": 0.725,
147
+ "rewards/match_format_approximately/std": 0.2654700517654419,
148
+ "rewards/match_format_exactly/mean": 0.975,
149
+ "rewards/match_format_exactly/std": 0.05,
150
+ "step": 50
151
+ },
152
+ {
153
+ "completion_length": 127.625,
154
+ "completions/clipped_ratio": 0.0,
155
+ "completions/max_length": 249.7,
156
+ "completions/max_terminated_length": 249.7,
157
+ "completions/mean_length": 127.625,
158
+ "completions/mean_terminated_length": 127.625,
159
+ "completions/min_length": 11.6,
160
+ "completions/min_terminated_length": 11.6,
161
+ "epoch": 0.004125979920231055,
162
+ "frac_reward_zero_std": 0.1,
163
+ "grad_norm": 1.772578477859497,
164
+ "kl": 1.2247508466243744,
165
+ "learning_rate": 4.634920634920635e-06,
166
+ "loss": 0.0012,
167
+ "num_tokens": 140377.0,
168
+ "reward": 5.075,
169
+ "reward_std": 0.583526349067688,
170
+ "rewards/check_coherence/mean": 0.925,
171
+ "rewards/check_coherence/std": 0.4689477920532227,
172
+ "rewards/check_response_quality/mean": 2.325,
173
+ "rewards/check_response_quality/std": 0.17886751294136047,
174
+ "rewards/match_format_approximately/mean": 0.825,
175
+ "rewards/match_format_approximately/std": 0.17886751294136047,
176
+ "rewards/match_format_exactly/mean": 1.0,
177
+ "rewards/match_format_exactly/std": 0.0,
178
+ "step": 60
179
+ },
180
+ {
181
+ "completion_length": 161.4,
182
+ "completions/clipped_ratio": 0.0,
183
+ "completions/max_length": 423.5,
184
+ "completions/max_terminated_length": 423.5,
185
+ "completions/mean_length": 161.4,
186
+ "completions/mean_terminated_length": 161.4,
187
+ "completions/min_length": 17.8,
188
+ "completions/min_terminated_length": 17.8,
189
+ "epoch": 0.004813643240269564,
190
+ "frac_reward_zero_std": 0.0,
191
+ "grad_norm": 3.128584861755371,
192
+ "kl": 0.8261611372232437,
193
+ "learning_rate": 4.476190476190477e-06,
194
+ "loss": 0.0008,
195
+ "num_tokens": 156317.0,
196
+ "reward": 5.1625,
197
+ "reward_std": 0.6853798747062683,
198
+ "rewards/check_coherence/mean": 1.025,
199
+ "rewards/check_coherence/std": 0.4077350258827209,
200
+ "rewards/check_response_quality/mean": 2.3125,
201
+ "rewards/check_response_quality/std": 0.21582483053207396,
202
+ "rewards/match_format_approximately/mean": 0.825,
203
+ "rewards/match_format_approximately/std": 0.2,
204
+ "rewards/match_format_exactly/mean": 1.0,
205
+ "rewards/match_format_exactly/std": 0.0,
206
+ "step": 70
207
+ },
208
+ {
209
+ "completion_length": 120.125,
210
+ "completions/clipped_ratio": 0.0,
211
+ "completions/max_length": 248.3,
212
+ "completions/max_terminated_length": 248.3,
213
+ "completions/mean_length": 120.125,
214
+ "completions/mean_terminated_length": 120.125,
215
+ "completions/min_length": 16.5,
216
+ "completions/min_terminated_length": 16.5,
217
+ "epoch": 0.0055013065603080735,
218
+ "frac_reward_zero_std": 0.4,
219
+ "grad_norm": 2.667182683944702,
220
+ "kl": 0.8017235696315765,
221
+ "learning_rate": 4.317460317460318e-06,
222
+ "loss": 0.0008,
223
+ "num_tokens": 171822.0,
224
+ "reward": 5.4,
225
+ "reward_std": 0.41746232509613035,
226
+ "rewards/check_coherence/mean": 1.2625,
227
+ "rewards/check_coherence/std": 0.2978713572025299,
228
+ "rewards/check_response_quality/mean": 2.3125,
229
+ "rewards/check_response_quality/std": 0.11582483053207397,
230
+ "rewards/match_format_approximately/mean": 0.825,
231
+ "rewards/match_format_approximately/std": 0.1,
232
+ "rewards/match_format_exactly/mean": 1.0,
233
+ "rewards/match_format_exactly/std": 0.0,
234
+ "step": 80
235
+ },
236
+ {
237
+ "completion_length": 164.875,
238
+ "completions/clipped_ratio": 0.0,
239
+ "completions/max_length": 490.9,
240
+ "completions/max_terminated_length": 490.9,
241
+ "completions/mean_length": 164.875,
242
+ "completions/mean_terminated_length": 164.875,
243
+ "completions/min_length": 13.5,
244
+ "completions/min_terminated_length": 13.5,
245
+ "epoch": 0.006188969880346582,
246
+ "frac_reward_zero_std": 0.0,
247
+ "grad_norm": 1.8317680358886719,
248
+ "kl": 0.8454075694084168,
249
+ "learning_rate": 4.158730158730159e-06,
250
+ "loss": 0.0008,
251
+ "num_tokens": 189509.0,
252
+ "reward": 5.325,
253
+ "reward_std": 0.7779198408126831,
254
+ "rewards/check_coherence/mean": 1.15,
255
+ "rewards/check_coherence/std": 0.47320507764816283,
256
+ "rewards/check_response_quality/mean": 2.3375,
257
+ "rewards/check_response_quality/std": 0.21933756470680238,
258
+ "rewards/match_format_approximately/mean": 0.8375,
259
+ "rewards/match_format_approximately/std": 0.21933756470680238,
260
+ "rewards/match_format_exactly/mean": 1.0,
261
+ "rewards/match_format_exactly/std": 0.0,
262
+ "step": 90
263
+ },
264
+ {
265
+ "completion_length": 128.325,
266
+ "completions/clipped_ratio": 0.0,
267
+ "completions/max_length": 322.5,
268
+ "completions/max_terminated_length": 322.5,
269
+ "completions/mean_length": 128.325,
270
+ "completions/mean_terminated_length": 128.325,
271
+ "completions/min_length": 46.3,
272
+ "completions/min_terminated_length": 46.3,
273
+ "epoch": 0.006876633200385091,
274
+ "frac_reward_zero_std": 0.3,
275
+ "grad_norm": 0.003549874061718583,
276
+ "kl": 0.820201675593853,
277
+ "learning_rate": 4.000000000000001e-06,
278
+ "loss": 0.0008,
279
+ "num_tokens": 205998.0,
280
+ "reward": 5.45,
281
+ "reward_std": 0.5343478560447693,
282
+ "rewards/check_coherence/mean": 1.225,
283
+ "rewards/check_coherence/std": 0.3809401035308838,
284
+ "rewards/check_response_quality/mean": 2.3625,
285
+ "rewards/check_response_quality/std": 0.10386751294136047,
286
+ "rewards/match_format_approximately/mean": 0.8625,
287
+ "rewards/match_format_approximately/std": 0.10386751294136047,
288
+ "rewards/match_format_exactly/mean": 1.0,
289
+ "rewards/match_format_exactly/std": 0.0,
290
+ "step": 100
291
+ },
292
+ {
293
+ "completion_length": 147.575,
294
+ "completions/clipped_ratio": 0.0,
295
+ "completions/max_length": 452.6,
296
+ "completions/max_terminated_length": 452.6,
297
+ "completions/mean_length": 147.575,
298
+ "completions/mean_terminated_length": 147.575,
299
+ "completions/min_length": 8.8,
300
+ "completions/min_terminated_length": 8.8,
301
+ "epoch": 0.007564296520423601,
302
+ "frac_reward_zero_std": 0.1,
303
+ "grad_norm": 2.094895124435425,
304
+ "kl": 1.165158998966217,
305
+ "learning_rate": 3.857142857142858e-06,
306
+ "loss": 0.0012,
307
+ "num_tokens": 224325.0,
308
+ "reward": 5.3,
309
+ "reward_std": 0.8748959302902222,
310
+ "rewards/check_coherence/mean": 1.2125,
311
+ "rewards/check_coherence/std": 0.38273502588272096,
312
+ "rewards/check_response_quality/mean": 2.2875,
313
+ "rewards/check_response_quality/std": 0.28898603916168214,
314
+ "rewards/match_format_approximately/mean": 0.825,
315
+ "rewards/match_format_approximately/std": 0.22320507764816283,
316
+ "rewards/match_format_exactly/mean": 0.975,
317
+ "rewards/match_format_exactly/std": 0.05,
318
+ "step": 110
319
+ },
320
+ {
321
+ "completion_length": 93.475,
322
+ "completions/clipped_ratio": 0.0,
323
+ "completions/max_length": 232.9,
324
+ "completions/max_terminated_length": 232.9,
325
+ "completions/mean_length": 93.475,
326
+ "completions/mean_terminated_length": 93.475,
327
+ "completions/min_length": 8.8,
328
+ "completions/min_terminated_length": 8.8,
329
+ "epoch": 0.00825195984046211,
330
+ "frac_reward_zero_std": 0.3,
331
+ "grad_norm": 1.8320742845535278,
332
+ "kl": 0.8784003466367721,
333
+ "learning_rate": 3.6984126984126987e-06,
334
+ "loss": 0.0009,
335
+ "num_tokens": 238872.0,
336
+ "reward": 5.6125,
337
+ "reward_std": 0.5023834943771363,
338
+ "rewards/check_coherence/mean": 1.3875,
339
+ "rewards/check_coherence/std": 0.1978713572025299,
340
+ "rewards/check_response_quality/mean": 2.3625,
341
+ "rewards/check_response_quality/std": 0.18273502588272095,
342
+ "rewards/match_format_approximately/mean": 0.8625,
343
+ "rewards/match_format_approximately/std": 0.18273502588272095,
344
+ "rewards/match_format_exactly/mean": 1.0,
345
+ "rewards/match_format_exactly/std": 0.0,
346
+ "step": 120
347
+ },
348
+ {
349
+ "completion_length": 63.425,
350
+ "completions/clipped_ratio": 0.0,
351
+ "completions/max_length": 180.7,
352
+ "completions/max_terminated_length": 180.7,
353
+ "completions/mean_length": 63.425,
354
+ "completions/mean_terminated_length": 63.425,
355
+ "completions/min_length": 9.8,
356
+ "completions/min_terminated_length": 9.8,
357
+ "epoch": 0.008939623160500619,
358
+ "frac_reward_zero_std": 0.5,
359
+ "grad_norm": 1.388489007949829,
360
+ "kl": 0.855793622136116,
361
+ "learning_rate": 3.53968253968254e-06,
362
+ "loss": 0.0009,
363
+ "num_tokens": 252433.0,
364
+ "reward": 5.75,
365
+ "reward_std": 0.4457427144050598,
366
+ "rewards/check_coherence/mean": 1.4,
367
+ "rewards/check_coherence/std": 0.2,
368
+ "rewards/check_response_quality/mean": 2.425,
369
+ "rewards/check_response_quality/std": 0.12886751294136048,
370
+ "rewards/match_format_approximately/mean": 0.925,
371
+ "rewards/match_format_approximately/std": 0.12886751294136048,
372
+ "rewards/match_format_exactly/mean": 1.0,
373
+ "rewards/match_format_exactly/std": 0.0,
374
+ "step": 130
375
+ },
376
+ {
377
+ "completion_length": 41.35,
378
+ "completions/clipped_ratio": 0.0,
379
+ "completions/max_length": 127.7,
380
+ "completions/max_terminated_length": 127.7,
381
+ "completions/mean_length": 41.35,
382
+ "completions/mean_terminated_length": 41.35,
383
+ "completions/min_length": 7.7,
384
+ "completions/min_terminated_length": 7.7,
385
+ "epoch": 0.009627286480539128,
386
+ "frac_reward_zero_std": 0.0,
387
+ "grad_norm": 2.7645645141601562,
388
+ "kl": 1.5349723994731903,
389
+ "learning_rate": 3.3809523809523814e-06,
390
+ "loss": 0.0015,
391
+ "num_tokens": 264579.0,
392
+ "reward": 5.0875,
393
+ "reward_std": 1.1462337374687195,
394
+ "rewards/check_coherence/mean": 1.15,
395
+ "rewards/check_coherence/std": 0.48867512941360475,
396
+ "rewards/check_response_quality/mean": 2.1875,
397
+ "rewards/check_response_quality/std": 0.3914190471172333,
398
+ "rewards/match_format_approximately/mean": 0.85,
399
+ "rewards/match_format_approximately/std": 0.20773502588272094,
400
+ "rewards/match_format_exactly/mean": 0.9,
401
+ "rewards/match_format_exactly/std": 0.1154700517654419,
402
+ "step": 140
403
+ },
404
+ {
405
+ "completion_length": 46.625,
406
+ "completions/clipped_ratio": 0.0,
407
+ "completions/max_length": 135.3,
408
+ "completions/max_terminated_length": 135.3,
409
+ "completions/mean_length": 46.625,
410
+ "completions/mean_terminated_length": 46.625,
411
+ "completions/min_length": 10.0,
412
+ "completions/min_terminated_length": 10.0,
413
+ "epoch": 0.010314949800577638,
414
+ "frac_reward_zero_std": 0.4,
415
+ "grad_norm": 0.36925405263900757,
416
+ "kl": 1.2759959518909454,
417
+ "learning_rate": 3.2222222222222227e-06,
418
+ "loss": 0.0013,
419
+ "num_tokens": 278280.0,
420
+ "reward": 5.7,
421
+ "reward_std": 0.4686140716075897,
422
+ "rewards/check_coherence/mean": 1.3875,
423
+ "rewards/check_coherence/std": 0.225,
424
+ "rewards/check_response_quality/mean": 2.4,
425
+ "rewards/check_response_quality/std": 0.15173887014389037,
426
+ "rewards/match_format_approximately/mean": 0.9125,
427
+ "rewards/match_format_approximately/std": 0.13273502588272096,
428
+ "rewards/match_format_exactly/mean": 1.0,
429
+ "rewards/match_format_exactly/std": 0.0,
430
+ "step": 150
431
+ },
432
+ {
433
+ "completion_length": 37.85,
434
+ "completions/clipped_ratio": 0.0,
435
+ "completions/max_length": 108.9,
436
+ "completions/max_terminated_length": 108.9,
437
+ "completions/mean_length": 37.85,
438
+ "completions/mean_terminated_length": 37.85,
439
+ "completions/min_length": 10.3,
440
+ "completions/min_terminated_length": 10.3,
441
+ "epoch": 0.011002613120616147,
442
+ "frac_reward_zero_std": 0.5,
443
+ "grad_norm": 0.003718956606462598,
444
+ "kl": 1.1356925666332245,
445
+ "learning_rate": 3.063492063492064e-06,
446
+ "loss": 0.0011,
447
+ "num_tokens": 289698.0,
448
+ "reward": 5.8625,
449
+ "reward_std": 0.275,
450
+ "rewards/check_coherence/mean": 1.4,
451
+ "rewards/check_coherence/std": 0.2,
452
+ "rewards/check_response_quality/mean": 2.475,
453
+ "rewards/check_response_quality/std": 0.05,
454
+ "rewards/match_format_approximately/mean": 0.9875,
455
+ "rewards/match_format_approximately/std": 0.025,
456
+ "rewards/match_format_exactly/mean": 1.0,
457
+ "rewards/match_format_exactly/std": 0.0,
458
+ "step": 160
459
+ },
460
+ {
461
+ "completion_length": 24.475,
462
+ "completions/clipped_ratio": 0.0,
463
+ "completions/max_length": 58.6,
464
+ "completions/max_terminated_length": 58.6,
465
+ "completions/mean_length": 24.475,
466
+ "completions/mean_terminated_length": 24.475,
467
+ "completions/min_length": 10.1,
468
+ "completions/min_terminated_length": 10.1,
469
+ "epoch": 0.011690276440654656,
470
+ "frac_reward_zero_std": 0.8,
471
+ "grad_norm": 0.0026451745070517063,
472
+ "kl": 1.1320037961006164,
473
+ "learning_rate": 2.9047619047619053e-06,
474
+ "loss": 0.0011,
475
+ "num_tokens": 302601.0,
476
+ "reward": 5.9,
477
+ "reward_std": 0.14574271440505981,
478
+ "rewards/check_coherence/mean": 1.425,
479
+ "rewards/check_coherence/std": 0.10773502588272095,
480
+ "rewards/check_response_quality/mean": 2.4875,
481
+ "rewards/check_response_quality/std": 0.025,
482
+ "rewards/match_format_approximately/mean": 0.9875,
483
+ "rewards/match_format_approximately/std": 0.025,
484
+ "rewards/match_format_exactly/mean": 1.0,
485
+ "rewards/match_format_exactly/std": 0.0,
486
+ "step": 170
487
+ },
488
+ {
489
+ "completion_length": 65.375,
490
+ "completions/clipped_ratio": 0.025,
491
+ "completions/max_length": 216.0,
492
+ "completions/max_terminated_length": 67.0,
493
+ "completions/mean_length": 65.375,
494
+ "completions/mean_terminated_length": 26.866666793823242,
495
+ "completions/min_length": 9.4,
496
+ "completions/min_terminated_length": 9.4,
497
+ "epoch": 0.012377939760693164,
498
+ "frac_reward_zero_std": 0.3,
499
+ "grad_norm": 2.4312984943389893,
500
+ "kl": 1.0950138330459596,
501
+ "learning_rate": 2.7460317460317466e-06,
502
+ "loss": 0.0011,
503
+ "num_tokens": 317912.0,
504
+ "reward": 5.775,
505
+ "reward_std": 0.3957427144050598,
506
+ "rewards/check_coherence/mean": 1.4,
507
+ "rewards/check_coherence/std": 0.2,
508
+ "rewards/check_response_quality/mean": 2.4375,
509
+ "rewards/check_response_quality/std": 0.10386751294136047,
510
+ "rewards/match_format_approximately/mean": 0.9375,
511
+ "rewards/match_format_approximately/std": 0.10386751294136047,
512
+ "rewards/match_format_exactly/mean": 1.0,
513
+ "rewards/match_format_exactly/std": 0.0,
514
+ "step": 180
515
+ },
516
+ {
517
+ "completion_length": 16.275,
518
+ "completions/clipped_ratio": 0.0,
519
+ "completions/max_length": 25.2,
520
+ "completions/max_terminated_length": 25.2,
521
+ "completions/mean_length": 16.275,
522
+ "completions/mean_terminated_length": 16.275,
523
+ "completions/min_length": 9.8,
524
+ "completions/min_terminated_length": 9.8,
525
+ "epoch": 0.013065603080731673,
526
+ "frac_reward_zero_std": 0.6,
527
+ "grad_norm": 0.004265956114977598,
528
+ "kl": 1.1890327751636505,
529
+ "learning_rate": 2.587301587301588e-06,
530
+ "loss": 0.0012,
531
+ "num_tokens": 332055.0,
532
+ "reward": 5.8875,
533
+ "reward_std": 0.18273502588272095,
534
+ "rewards/check_coherence/mean": 1.3875,
535
+ "rewards/check_coherence/std": 0.18273502588272095,
536
+ "rewards/check_response_quality/mean": 2.5,
537
+ "rewards/check_response_quality/std": 0.0,
538
+ "rewards/match_format_approximately/mean": 1.0,
539
+ "rewards/match_format_approximately/std": 0.0,
540
+ "rewards/match_format_exactly/mean": 1.0,
541
+ "rewards/match_format_exactly/std": 0.0,
542
+ "step": 190
543
+ },
544
+ {
545
+ "completion_length": 37.9,
546
+ "completions/clipped_ratio": 0.0,
547
+ "completions/max_length": 112.8,
548
+ "completions/max_terminated_length": 112.8,
549
+ "completions/mean_length": 37.9,
550
+ "completions/mean_terminated_length": 37.9,
551
+ "completions/min_length": 8.1,
552
+ "completions/min_terminated_length": 8.1,
553
+ "epoch": 0.013753266400770183,
554
+ "frac_reward_zero_std": 0.4,
555
+ "grad_norm": 16.73357582092285,
556
+ "kl": 1.1858276724815369,
557
+ "learning_rate": 2.428571428571429e-06,
558
+ "loss": 0.0012,
559
+ "num_tokens": 344875.0,
560
+ "reward": 5.75,
561
+ "reward_std": 0.4457427144050598,
562
+ "rewards/check_coherence/mean": 1.35,
563
+ "rewards/check_coherence/std": 0.25773502588272096,
564
+ "rewards/check_response_quality/mean": 2.45,
565
+ "rewards/check_response_quality/std": 0.1,
566
+ "rewards/match_format_approximately/mean": 0.95,
567
+ "rewards/match_format_approximately/std": 0.1,
568
+ "rewards/match_format_exactly/mean": 1.0,
569
+ "rewards/match_format_exactly/std": 0.0,
570
+ "step": 200
571
+ },
572
+ {
573
+ "completion_length": 57.225,
574
+ "completions/clipped_ratio": 0.0,
575
+ "completions/max_length": 142.9,
576
+ "completions/max_terminated_length": 142.9,
577
+ "completions/mean_length": 57.225,
578
+ "completions/mean_terminated_length": 57.225,
579
+ "completions/min_length": 9.4,
580
+ "completions/min_terminated_length": 9.4,
581
+ "epoch": 0.014440929720808692,
582
+ "frac_reward_zero_std": 0.6,
583
+ "grad_norm": 0.006797971669584513,
584
+ "kl": 1.0238984107971192,
585
+ "learning_rate": 2.26984126984127e-06,
586
+ "loss": 0.001,
587
+ "num_tokens": 359308.0,
588
+ "reward": 5.7625,
589
+ "reward_std": 0.2946484684944153,
590
+ "rewards/check_coherence/mean": 1.375,
591
+ "rewards/check_coherence/std": 0.20773502588272094,
592
+ "rewards/check_response_quality/mean": 2.4375,
593
+ "rewards/check_response_quality/std": 0.06582483053207397,
594
+ "rewards/match_format_approximately/mean": 0.95,
595
+ "rewards/match_format_approximately/std": 0.05,
596
+ "rewards/match_format_exactly/mean": 1.0,
597
+ "rewards/match_format_exactly/std": 0.0,
598
+ "step": 210
599
+ },
600
+ {
601
+ "completion_length": 60.225,
602
+ "completions/clipped_ratio": 0.0,
603
+ "completions/max_length": 198.7,
604
+ "completions/max_terminated_length": 198.7,
605
+ "completions/mean_length": 60.225,
606
+ "completions/mean_terminated_length": 60.225,
607
+ "completions/min_length": 10.7,
608
+ "completions/min_terminated_length": 10.7,
609
+ "epoch": 0.015128593040847202,
610
+ "frac_reward_zero_std": 0.4,
611
+ "grad_norm": 1.2490910291671753,
612
+ "kl": 1.139722502231598,
613
+ "learning_rate": 2.1111111111111114e-06,
614
+ "loss": 0.0011,
615
+ "num_tokens": 372089.0,
616
+ "reward": 5.8,
617
+ "reward_std": 0.4,
618
+ "rewards/check_coherence/mean": 1.375,
619
+ "rewards/check_coherence/std": 0.25,
620
+ "rewards/check_response_quality/mean": 2.4625,
621
+ "rewards/check_response_quality/std": 0.075,
622
+ "rewards/match_format_approximately/mean": 0.9625,
623
+ "rewards/match_format_approximately/std": 0.075,
624
+ "rewards/match_format_exactly/mean": 1.0,
625
+ "rewards/match_format_exactly/std": 0.0,
626
+ "step": 220
627
+ },
628
+ {
629
+ "completion_length": 30.075,
630
+ "completions/clipped_ratio": 0.0,
631
+ "completions/max_length": 84.5,
632
+ "completions/max_terminated_length": 84.5,
633
+ "completions/mean_length": 30.075,
634
+ "completions/mean_terminated_length": 30.075,
635
+ "completions/min_length": 9.8,
636
+ "completions/min_terminated_length": 9.8,
637
+ "epoch": 0.01581625636088571,
638
+ "frac_reward_zero_std": 0.7,
639
+ "grad_norm": 0.004948398098349571,
640
+ "kl": 1.1260765612125396,
641
+ "learning_rate": 1.9523809523809527e-06,
642
+ "loss": 0.0011,
643
+ "num_tokens": 384276.0,
644
+ "reward": 5.875,
645
+ "reward_std": 0.1728713572025299,
646
+ "rewards/check_coherence/mean": 1.4375,
647
+ "rewards/check_coherence/std": 0.0978713572025299,
648
+ "rewards/check_response_quality/mean": 2.4625,
649
+ "rewards/check_response_quality/std": 0.075,
650
+ "rewards/match_format_approximately/mean": 0.975,
651
+ "rewards/match_format_approximately/std": 0.05,
652
+ "rewards/match_format_exactly/mean": 1.0,
653
+ "rewards/match_format_exactly/std": 0.0,
654
+ "step": 230
655
+ },
656
+ {
657
+ "completion_length": 65.25,
658
+ "completions/clipped_ratio": 0.025,
659
+ "completions/max_length": 223.1,
660
+ "completions/max_terminated_length": 65.9,
661
+ "completions/mean_length": 65.25,
662
+ "completions/mean_terminated_length": 25.908333396911623,
663
+ "completions/min_length": 9.1,
664
+ "completions/min_terminated_length": 9.1,
665
+ "epoch": 0.01650391968092422,
666
+ "frac_reward_zero_std": 0.6,
667
+ "grad_norm": 0.0049158609472215176,
668
+ "kl": 1.044507622718811,
669
+ "learning_rate": 1.7936507936507938e-06,
670
+ "loss": 0.001,
671
+ "num_tokens": 399686.0,
672
+ "reward": 5.8375,
673
+ "reward_std": 0.325,
674
+ "rewards/check_coherence/mean": 1.45,
675
+ "rewards/check_coherence/std": 0.1,
676
+ "rewards/check_response_quality/mean": 2.4375,
677
+ "rewards/check_response_quality/std": 0.125,
678
+ "rewards/match_format_approximately/mean": 0.95,
679
+ "rewards/match_format_approximately/std": 0.1,
680
+ "rewards/match_format_exactly/mean": 1.0,
681
+ "rewards/match_format_exactly/std": 0.0,
682
+ "step": 240
683
+ },
684
+ {
685
+ "completion_length": 34.25,
686
+ "completions/clipped_ratio": 0.0,
687
+ "completions/max_length": 97.6,
688
+ "completions/max_terminated_length": 97.6,
689
+ "completions/mean_length": 34.25,
690
+ "completions/mean_terminated_length": 34.25,
691
+ "completions/min_length": 9.6,
692
+ "completions/min_terminated_length": 9.6,
693
+ "epoch": 0.01719158300096273,
694
+ "frac_reward_zero_std": 0.6,
695
+ "grad_norm": 0.0035998751409351826,
696
+ "kl": 1.131579464673996,
697
+ "learning_rate": 1.6349206349206351e-06,
698
+ "loss": 0.0011,
699
+ "num_tokens": 412936.0,
700
+ "reward": 5.875,
701
+ "reward_std": 0.25,
702
+ "rewards/check_coherence/mean": 1.45,
703
+ "rewards/check_coherence/std": 0.1,
704
+ "rewards/check_response_quality/mean": 2.4625,
705
+ "rewards/check_response_quality/std": 0.075,
706
+ "rewards/match_format_approximately/mean": 0.9625,
707
+ "rewards/match_format_approximately/std": 0.075,
708
+ "rewards/match_format_exactly/mean": 1.0,
709
+ "rewards/match_format_exactly/std": 0.0,
710
+ "step": 250
711
+ },
712
+ {
713
+ "completion_length": 44.525,
714
+ "completions/clipped_ratio": 0.0,
715
+ "completions/max_length": 138.9,
716
+ "completions/max_terminated_length": 138.9,
717
+ "completions/mean_length": 44.525,
718
+ "completions/mean_terminated_length": 44.525,
719
+ "completions/min_length": 9.5,
720
+ "completions/min_terminated_length": 9.5,
721
+ "epoch": 0.017879246321001237,
722
+ "frac_reward_zero_std": 0.8,
723
+ "grad_norm": 0.003912392072379589,
724
+ "kl": 1.0597735822200776,
725
+ "learning_rate": 1.4761904761904762e-06,
726
+ "loss": 0.0011,
727
+ "num_tokens": 427709.0,
728
+ "reward": 5.9375,
729
+ "reward_std": 0.125,
730
+ "rewards/check_coherence/mean": 1.475,
731
+ "rewards/check_coherence/std": 0.05,
732
+ "rewards/check_response_quality/mean": 2.475,
733
+ "rewards/check_response_quality/std": 0.05,
734
+ "rewards/match_format_approximately/mean": 0.9875,
735
+ "rewards/match_format_approximately/std": 0.025,
736
+ "rewards/match_format_exactly/mean": 1.0,
737
+ "rewards/match_format_exactly/std": 0.0,
738
+ "step": 260
739
+ },
740
+ {
741
+ "completion_length": 13.175,
742
+ "completions/clipped_ratio": 0.0,
743
+ "completions/max_length": 22.9,
744
+ "completions/max_terminated_length": 22.9,
745
+ "completions/mean_length": 13.175,
746
+ "completions/mean_terminated_length": 13.175,
747
+ "completions/min_length": 7.3,
748
+ "completions/min_terminated_length": 7.3,
749
+ "epoch": 0.01856690964103975,
750
+ "frac_reward_zero_std": 0.6,
751
+ "grad_norm": 4.749101638793945,
752
+ "kl": 1.352827501296997,
753
+ "learning_rate": 1.3174603174603175e-06,
754
+ "loss": 0.0014,
755
+ "num_tokens": 437888.0,
756
+ "reward": 5.7625,
757
+ "reward_std": 0.41304759979248046,
758
+ "rewards/check_coherence/mean": 1.4125,
759
+ "rewards/check_coherence/std": 0.175,
760
+ "rewards/check_response_quality/mean": 2.4125,
761
+ "rewards/check_response_quality/std": 0.14464847445487977,
762
+ "rewards/match_format_approximately/mean": 0.9625,
763
+ "rewards/match_format_approximately/std": 0.053867512941360475,
764
+ "rewards/match_format_exactly/mean": 0.975,
765
+ "rewards/match_format_exactly/std": 0.05,
766
+ "step": 270
767
+ },
768
+ {
769
+ "completion_length": 29.05,
770
+ "completions/clipped_ratio": 0.0,
771
+ "completions/max_length": 75.4,
772
+ "completions/max_terminated_length": 75.4,
773
+ "completions/mean_length": 29.05,
774
+ "completions/mean_terminated_length": 29.05,
775
+ "completions/min_length": 9.3,
776
+ "completions/min_terminated_length": 9.3,
777
+ "epoch": 0.019254572961078256,
778
+ "frac_reward_zero_std": 0.7,
779
+ "grad_norm": 0.0032444808166474104,
780
+ "kl": 1.0743911743164063,
781
+ "learning_rate": 1.1587301587301589e-06,
782
+ "loss": 0.0011,
783
+ "num_tokens": 450566.0,
784
+ "reward": 5.8875,
785
+ "reward_std": 0.225,
786
+ "rewards/check_coherence/mean": 1.4375,
787
+ "rewards/check_coherence/std": 0.125,
788
+ "rewards/check_response_quality/mean": 2.475,
789
+ "rewards/check_response_quality/std": 0.05,
790
+ "rewards/match_format_approximately/mean": 0.975,
791
+ "rewards/match_format_approximately/std": 0.05,
792
+ "rewards/match_format_exactly/mean": 1.0,
793
+ "rewards/match_format_exactly/std": 0.0,
794
+ "step": 280
795
+ },
796
+ {
797
+ "completion_length": 35.0,
798
+ "completions/clipped_ratio": 0.0,
799
+ "completions/max_length": 100.7,
800
+ "completions/max_terminated_length": 100.7,
801
+ "completions/mean_length": 35.0,
802
+ "completions/mean_terminated_length": 35.0,
803
+ "completions/min_length": 9.7,
804
+ "completions/min_terminated_length": 9.7,
805
+ "epoch": 0.019942236281116764,
806
+ "frac_reward_zero_std": 0.7,
807
+ "grad_norm": 4.597572326660156,
808
+ "kl": 9.488562166690826,
809
+ "learning_rate": 1.0000000000000002e-06,
810
+ "loss": 0.0095,
811
+ "num_tokens": 462366.0,
812
+ "reward": 5.875,
813
+ "reward_std": 0.20773502588272094,
814
+ "rewards/check_coherence/mean": 1.4,
815
+ "rewards/check_coherence/std": 0.15773502588272095,
816
+ "rewards/check_response_quality/mean": 2.4875,
817
+ "rewards/check_response_quality/std": 0.025,
818
+ "rewards/match_format_approximately/mean": 0.9875,
819
+ "rewards/match_format_approximately/std": 0.025,
820
+ "rewards/match_format_exactly/mean": 1.0,
821
+ "rewards/match_format_exactly/std": 0.0,
822
+ "step": 290
823
+ },
824
+ {
825
+ "completion_length": 16.875,
826
+ "completions/clipped_ratio": 0.0,
827
+ "completions/max_length": 31.8,
828
+ "completions/max_terminated_length": 31.8,
829
+ "completions/mean_length": 16.875,
830
+ "completions/mean_terminated_length": 16.875,
831
+ "completions/min_length": 8.6,
832
+ "completions/min_terminated_length": 8.6,
833
+ "epoch": 0.020629899601155275,
834
+ "frac_reward_zero_std": 0.8,
835
+ "grad_norm": 0.0025285985320806503,
836
+ "kl": 1.0927811563014984,
837
+ "learning_rate": 8.412698412698414e-07,
838
+ "loss": 0.0011,
839
+ "num_tokens": 473501.0,
840
+ "reward": 5.9375,
841
+ "reward_std": 0.125,
842
+ "rewards/check_coherence/mean": 1.5,
843
+ "rewards/check_coherence/std": 0.0,
844
+ "rewards/check_response_quality/mean": 2.4625,
845
+ "rewards/check_response_quality/std": 0.075,
846
+ "rewards/match_format_approximately/mean": 0.975,
847
+ "rewards/match_format_approximately/std": 0.05,
848
+ "rewards/match_format_exactly/mean": 1.0,
849
+ "rewards/match_format_exactly/std": 0.0,
850
+ "step": 300
851
+ },
852
+ {
853
+ "completion_length": 34.825,
854
+ "completions/clipped_ratio": 0.0,
855
+ "completions/max_length": 104.8,
856
+ "completions/max_terminated_length": 104.8,
857
+ "completions/mean_length": 34.825,
858
+ "completions/mean_terminated_length": 34.825,
859
+ "completions/min_length": 9.6,
860
+ "completions/min_terminated_length": 9.6,
861
+ "epoch": 0.021317562921193783,
862
+ "frac_reward_zero_std": 0.9,
863
+ "grad_norm": 0.004729569889605045,
864
+ "kl": 1.0428565800189973,
865
+ "learning_rate": 6.825396825396826e-07,
866
+ "loss": 0.001,
867
+ "num_tokens": 487282.0,
868
+ "reward": 5.975,
869
+ "reward_std": 0.05,
870
+ "rewards/check_coherence/mean": 1.5,
871
+ "rewards/check_coherence/std": 0.0,
872
+ "rewards/check_response_quality/mean": 2.4875,
873
+ "rewards/check_response_quality/std": 0.025,
874
+ "rewards/match_format_approximately/mean": 0.9875,
875
+ "rewards/match_format_approximately/std": 0.025,
876
+ "rewards/match_format_exactly/mean": 1.0,
877
+ "rewards/match_format_exactly/std": 0.0,
878
+ "step": 310
879
+ },
880
+ {
881
+ "completion_length": 28.475,
882
+ "completions/clipped_ratio": 0.0,
883
+ "completions/max_length": 81.7,
884
+ "completions/max_terminated_length": 81.7,
885
+ "completions/mean_length": 28.475,
886
+ "completions/mean_terminated_length": 28.475,
887
+ "completions/min_length": 9.0,
888
+ "completions/min_terminated_length": 9.0,
889
+ "epoch": 0.022005226241232294,
890
+ "frac_reward_zero_std": 0.8,
891
+ "grad_norm": 6.385797500610352,
892
+ "kl": 1.2766858220100403,
893
+ "learning_rate": 5.238095238095239e-07,
894
+ "loss": 0.0013,
895
+ "num_tokens": 500121.0,
896
+ "reward": 5.925,
897
+ "reward_std": 0.15,
898
+ "rewards/check_coherence/mean": 1.45,
899
+ "rewards/check_coherence/std": 0.1,
900
+ "rewards/check_response_quality/mean": 2.4875,
901
+ "rewards/check_response_quality/std": 0.025,
902
+ "rewards/match_format_approximately/mean": 0.9875,
903
+ "rewards/match_format_approximately/std": 0.025,
904
+ "rewards/match_format_exactly/mean": 1.0,
905
+ "rewards/match_format_exactly/std": 0.0,
906
+ "step": 320
907
+ },
908
+ {
909
+ "completion_length": 18.75,
910
+ "completions/clipped_ratio": 0.0,
911
+ "completions/max_length": 33.8,
912
+ "completions/max_terminated_length": 33.8,
913
+ "completions/mean_length": 18.75,
914
+ "completions/mean_terminated_length": 18.75,
915
+ "completions/min_length": 8.4,
916
+ "completions/min_terminated_length": 8.4,
917
+ "epoch": 0.0226928895612708,
918
+ "frac_reward_zero_std": 0.7,
919
+ "grad_norm": 0.010602269321680069,
920
+ "kl": 1.1651100397109986,
921
+ "learning_rate": 3.6507936507936514e-07,
922
+ "loss": 0.0012,
923
+ "num_tokens": 514331.0,
924
+ "reward": 5.8875,
925
+ "reward_std": 0.18273502588272095,
926
+ "rewards/check_coherence/mean": 1.4625,
927
+ "rewards/check_coherence/std": 0.075,
928
+ "rewards/check_response_quality/mean": 2.4625,
929
+ "rewards/check_response_quality/std": 0.053867512941360475,
930
+ "rewards/match_format_approximately/mean": 0.9625,
931
+ "rewards/match_format_approximately/std": 0.053867512941360475,
932
+ "rewards/match_format_exactly/mean": 1.0,
933
+ "rewards/match_format_exactly/std": 0.0,
934
+ "step": 330
935
+ },
936
+ {
937
+ "completion_length": 13.8,
938
+ "completions/clipped_ratio": 0.0,
939
+ "completions/max_length": 20.7,
940
+ "completions/max_terminated_length": 20.7,
941
+ "completions/mean_length": 13.8,
942
+ "completions/mean_terminated_length": 13.8,
943
+ "completions/min_length": 8.9,
944
+ "completions/min_terminated_length": 8.9,
945
+ "epoch": 0.023380552881309313,
946
+ "frac_reward_zero_std": 0.9,
947
+ "grad_norm": 0.007794269360601902,
948
+ "kl": 1.1532993257045745,
949
+ "learning_rate": 2.0634920634920635e-07,
950
+ "loss": 0.0012,
951
+ "num_tokens": 528547.0,
952
+ "reward": 5.975,
953
+ "reward_std": 0.05,
954
+ "rewards/check_coherence/mean": 1.475,
955
+ "rewards/check_coherence/std": 0.05,
956
+ "rewards/check_response_quality/mean": 2.5,
957
+ "rewards/check_response_quality/std": 0.0,
958
+ "rewards/match_format_approximately/mean": 1.0,
959
+ "rewards/match_format_approximately/std": 0.0,
960
+ "rewards/match_format_exactly/mean": 1.0,
961
+ "rewards/match_format_exactly/std": 0.0,
962
+ "step": 340
963
+ },
964
+ {
965
+ "completion_length": 13.9,
966
+ "completions/clipped_ratio": 0.0,
967
+ "completions/max_length": 21.0,
968
+ "completions/max_terminated_length": 21.0,
969
+ "completions/mean_length": 13.9,
970
+ "completions/mean_terminated_length": 13.9,
971
+ "completions/min_length": 9.0,
972
+ "completions/min_terminated_length": 9.0,
973
+ "epoch": 0.02406821620134782,
974
+ "frac_reward_zero_std": 0.8,
975
+ "grad_norm": 3.0694658756256104,
976
+ "kl": 1.2572884202003478,
977
+ "learning_rate": 4.7619047619047627e-08,
978
+ "loss": 0.0013,
979
+ "num_tokens": 540139.0,
980
+ "reward": 5.95,
981
+ "reward_std": 0.1,
982
+ "rewards/check_coherence/mean": 1.45,
983
+ "rewards/check_coherence/std": 0.1,
984
+ "rewards/check_response_quality/mean": 2.5,
985
+ "rewards/check_response_quality/std": 0.0,
986
+ "rewards/match_format_approximately/mean": 1.0,
987
+ "rewards/match_format_approximately/std": 0.0,
988
+ "rewards/match_format_exactly/mean": 1.0,
989
+ "rewards/match_format_exactly/std": 0.0,
990
+ "step": 350
991
  }
992
  ],
993
  "logging_steps": 10,
994
+ "max_steps": 350,
995
+ "num_input_tokens_seen": 540139,
996
  "num_train_epochs": 1,
997
  "save_steps": 10,
998
  "stateful_callbacks": {
 
1002
  "should_evaluate": false,
1003
  "should_log": false,
1004
  "should_save": true,
1005
+ "should_training_stop": true
1006
  },
1007
  "attributes": {}
1008
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89640938646618221e1b1ed739226e1b8be20971bb4a902cd531ba9b4399d32e
3
  size 7313
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99dd8fd386f6c88582c8ee70c2a210ac339045a5b9e54e02f9fae15a6dfb2047
3
  size 7313