biggy-smiley commited on
Commit
89349ff
·
verified ·
1 Parent(s): 953832f

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92965ce174a5a7f2cd7cc2a9458e927278fbe87e49f06e7f80e19898f1ec8065
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd68aebab71b92a6ed1b6722fe165759ebf83a538eb2271c50c9f7d34b9310a
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0cedd25416214cd9a75c7b887bbb7f5f8da59d9275014384fada95a849e2376
3
  size 876185914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cadcda0e477d2446a66b7f972b749a231e78f5c08da90e2a860fe7f51cc5c671
3
  size 876185914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d3d464249f4a39ec225bcb2a558b6eb72607957e90d74d6a2882277e87903cf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ae56aa2daceb1c35b80f86e740f89fafa1aeedd31c9afb3852c0bad57bceb6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d9049a24c9e94cbf3030b6058909a52a72de351c864ad3381bc5ac9def102ff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d894bb4a51f57aaa20006b8fe3b25869340284fd903307bc0e17589d747cb184
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5564497113227844,
3
  "best_model_checkpoint": "/kaggle/working/results/checkpoint-11500",
4
- "epoch": 0.8252601363473269,
5
  "eval_steps": 500,
6
- "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -352,6 +352,66 @@
352
  "eval_samples_per_second": 68.365,
353
  "eval_steps_per_second": 0.539,
354
  "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
  ],
357
  "logging_steps": 500,
@@ -371,7 +431,7 @@
371
  "attributes": {}
372
  }
373
  },
374
- "total_flos": 9.6845732806656e+16,
375
  "train_batch_size": 32,
376
  "trial_name": null,
377
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5564497113227844,
3
  "best_model_checkpoint": "/kaggle/working/results/checkpoint-11500",
4
+ "epoch": 0.9687836383207751,
5
  "eval_steps": 500,
6
+ "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
352
  "eval_samples_per_second": 68.365,
353
  "eval_steps_per_second": 0.539,
354
  "step": 11500
355
+ },
356
+ {
357
+ "epoch": 0.8611410118406889,
358
+ "grad_norm": 5.504430294036865,
359
+ "learning_rate": 5.554359526372443e-06,
360
+ "loss": 0.4743,
361
+ "step": 12000
362
+ },
363
+ {
364
+ "epoch": 0.8611410118406889,
365
+ "eval_loss": 0.5606986284255981,
366
+ "eval_runtime": 152.3774,
367
+ "eval_samples_per_second": 68.252,
368
+ "eval_steps_per_second": 0.538,
369
+ "step": 12000
370
+ },
371
+ {
372
+ "epoch": 0.897021887334051,
373
+ "grad_norm": 4.161441326141357,
374
+ "learning_rate": 4.119124506637962e-06,
375
+ "loss": 0.4842,
376
+ "step": 12500
377
+ },
378
+ {
379
+ "epoch": 0.897021887334051,
380
+ "eval_loss": 0.5657362937927246,
381
+ "eval_runtime": 152.0558,
382
+ "eval_samples_per_second": 68.396,
383
+ "eval_steps_per_second": 0.539,
384
+ "step": 12500
385
+ },
386
+ {
387
+ "epoch": 0.932902762827413,
388
+ "grad_norm": 15.681989669799805,
389
+ "learning_rate": 2.6838894869034805e-06,
390
+ "loss": 0.4583,
391
+ "step": 13000
392
+ },
393
+ {
394
+ "epoch": 0.932902762827413,
395
+ "eval_loss": 0.5640388131141663,
396
+ "eval_runtime": 152.0718,
397
+ "eval_samples_per_second": 68.389,
398
+ "eval_steps_per_second": 0.539,
399
+ "step": 13000
400
+ },
401
+ {
402
+ "epoch": 0.9687836383207751,
403
+ "grad_norm": 6.109396934509277,
404
+ "learning_rate": 1.248654467168999e-06,
405
+ "loss": 0.4662,
406
+ "step": 13500
407
+ },
408
+ {
409
+ "epoch": 0.9687836383207751,
410
+ "eval_loss": 0.5629301071166992,
411
+ "eval_runtime": 152.1551,
412
+ "eval_samples_per_second": 68.351,
413
+ "eval_steps_per_second": 0.539,
414
+ "step": 13500
415
  }
416
  ],
417
  "logging_steps": 500,
 
431
  "attributes": {}
432
  }
433
  },
434
+ "total_flos": 1.13688468946944e+17,
435
  "train_batch_size": 32,
436
  "trial_name": null,
437
  "trial_params": null