SpideyDLK commited on
Commit
dec9569
·
verified ·
1 Parent(s): 4629b0d

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61f07e30ba6c3e4d46423b96f0cbcbd82106b6bb256c1c12e811925d96d026b5
3
  size 1262135480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8815656f39bb31a27997a0b4985228221e357c0c54a99006d4f5c2dab0368d2
3
  size 1262135480
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67dae5c921c1d0e3ec0ef2ca745ddb83ae850f785c418cb799e8d39e9977effb
3
  size 2490815798
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1bc25a2e77b3cb67994f8b8ed9943ee4caad68cba1eae5f0dab4aaa607f87db
3
  size 2490815798
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aed632af97046c1419a3b3bfb31bf5b26a7fe13c4e9fee7877e803a6d9b9ab78
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae3e06391b56f7d5d884accce0258dfa3afdca721551b70c01b265975815b61
3
+ size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed90542fa85abfeb1fc1d8ba1917d59ae062284951ce0a75c17787d5046ce253
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0385e4a12dd5228a18991bc2f050826728def0ff0eed46203f29e5046dc0c739
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.297150610583447,
5
  "eval_steps": 400,
6
- "global_step": 19600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -791,6 +791,22 @@
791
  "eval_steps_per_second": 0.831,
792
  "eval_wer": 0.06208596518256047,
793
  "step": 19600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  }
795
  ],
796
  "logging_steps": 400,
@@ -798,7 +814,7 @@
798
  "num_input_tokens_seen": 0,
799
  "num_train_epochs": 30,
800
  "save_steps": 400,
801
- "total_flos": 5.978374840640582e+19,
802
  "train_batch_size": 8,
803
  "trial_name": null,
804
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.568521031207599,
5
  "eval_steps": 400,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
791
  "eval_steps_per_second": 0.831,
792
  "eval_wer": 0.06208596518256047,
793
  "step": 19600
794
+ },
795
+ {
796
+ "epoch": 13.57,
797
+ "grad_norm": 0.8884561657905579,
798
+ "learning_rate": 0.00016619396157365048,
799
+ "loss": 0.0773,
800
+ "step": 20000
801
+ },
802
+ {
803
+ "epoch": 13.57,
804
+ "eval_loss": 0.032973628491163254,
805
+ "eval_runtime": 210.0131,
806
+ "eval_samples_per_second": 6.661,
807
+ "eval_steps_per_second": 0.833,
808
+ "eval_wer": 0.05646279463873055,
809
+ "step": 20000
810
  }
811
  ],
812
  "logging_steps": 400,
 
814
  "num_input_tokens_seen": 0,
815
  "num_train_epochs": 30,
816
  "save_steps": 400,
817
+ "total_flos": 6.100475557233166e+19,
818
  "train_batch_size": 8,
819
  "trial_name": null,
820
  "trial_params": null