fats-fme commited on
Commit
fef5f7a
·
verified ·
1 Parent(s): a073efb

Training in progress, step 36, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a86ba9385bb768d29deab79937c811ef79a601d619335c9e94af62c9f11709ce
3
  size 70667778
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3161d89e2ad1b44a18bf418bfb17aa903d17835c252c09465b64747372bac3f
3
  size 70667778
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff1de991a5e8b028670d79166d941671fcf1f558f0044b08eaf5154d066f0048
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ab5129ab155330a39a9371aacd2f73f3677218096c1c34b3766d995f841a91
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a5995616407d184ecedacf74ac31cd0731474c7e2323f03405d89945bad93ff
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81435bd0a6a3789b553c69a79f8c384151f9688d0cd7d745e20e5aadeece4761
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b375860c66674ec9637bb873a1ff676ec525ee5502840d2f1d554d4237b8f0c4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844a9e8b65ea1c19a7e6553433121a4f8c699c309e7900990529efa7ada6321b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.512,
5
  "eval_steps": 12,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -199,6 +199,98 @@
199
  "eval_samples_per_second": 21.061,
200
  "eval_steps_per_second": 5.332,
201
  "step": 24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  }
203
  ],
204
  "logging_steps": 1,
@@ -218,7 +310,7 @@
218
  "attributes": {}
219
  }
220
  },
221
- "total_flos": 3460580643962880.0,
222
  "train_batch_size": 2,
223
  "trial_name": null,
224
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.768,
5
  "eval_steps": 12,
6
+ "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
199
  "eval_samples_per_second": 21.061,
200
  "eval_steps_per_second": 5.332,
201
  "step": 24
202
+ },
203
+ {
204
+ "epoch": 0.5333333333333333,
205
+ "grad_norm": NaN,
206
+ "learning_rate": 5e-05,
207
+ "loss": 0.0,
208
+ "step": 25
209
+ },
210
+ {
211
+ "epoch": 0.5546666666666666,
212
+ "grad_norm": NaN,
213
+ "learning_rate": 5.2000000000000004e-05,
214
+ "loss": 0.0,
215
+ "step": 26
216
+ },
217
+ {
218
+ "epoch": 0.576,
219
+ "grad_norm": NaN,
220
+ "learning_rate": 5.4000000000000005e-05,
221
+ "loss": 0.0,
222
+ "step": 27
223
+ },
224
+ {
225
+ "epoch": 0.5973333333333334,
226
+ "grad_norm": NaN,
227
+ "learning_rate": 5.6000000000000006e-05,
228
+ "loss": 0.0,
229
+ "step": 28
230
+ },
231
+ {
232
+ "epoch": 0.6186666666666667,
233
+ "grad_norm": NaN,
234
+ "learning_rate": 5.8e-05,
235
+ "loss": 0.0,
236
+ "step": 29
237
+ },
238
+ {
239
+ "epoch": 0.64,
240
+ "grad_norm": NaN,
241
+ "learning_rate": 6e-05,
242
+ "loss": 0.0,
243
+ "step": 30
244
+ },
245
+ {
246
+ "epoch": 0.6613333333333333,
247
+ "grad_norm": NaN,
248
+ "learning_rate": 6.2e-05,
249
+ "loss": 0.0,
250
+ "step": 31
251
+ },
252
+ {
253
+ "epoch": 0.6826666666666666,
254
+ "grad_norm": NaN,
255
+ "learning_rate": 6.400000000000001e-05,
256
+ "loss": 0.0,
257
+ "step": 32
258
+ },
259
+ {
260
+ "epoch": 0.704,
261
+ "grad_norm": NaN,
262
+ "learning_rate": 6.6e-05,
263
+ "loss": 0.0,
264
+ "step": 33
265
+ },
266
+ {
267
+ "epoch": 0.7253333333333334,
268
+ "grad_norm": NaN,
269
+ "learning_rate": 6.800000000000001e-05,
270
+ "loss": 0.0,
271
+ "step": 34
272
+ },
273
+ {
274
+ "epoch": 0.7466666666666667,
275
+ "grad_norm": NaN,
276
+ "learning_rate": 7e-05,
277
+ "loss": 0.0,
278
+ "step": 35
279
+ },
280
+ {
281
+ "epoch": 0.768,
282
+ "grad_norm": NaN,
283
+ "learning_rate": 7.2e-05,
284
+ "loss": 0.0,
285
+ "step": 36
286
+ },
287
+ {
288
+ "epoch": 0.768,
289
+ "eval_loss": NaN,
290
+ "eval_runtime": 3.6511,
291
+ "eval_samples_per_second": 21.637,
292
+ "eval_steps_per_second": 5.478,
293
+ "step": 36
294
  }
295
  ],
296
  "logging_steps": 1,
 
310
  "attributes": {}
311
  }
312
  },
313
+ "total_flos": 5190870965944320.0,
314
  "train_batch_size": 2,
315
  "trial_name": null,
316
  "trial_params": null