marcel commited on
Commit
aa5a032
1 Parent(s): 93bbfa0
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +101 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:644e8bd2b69b37fca5754768f276d194935d131c7de9be692538bd6b5e6fb479
3
  size 2490339591
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc817f3687769e04e797f4a2d65e705085e218a0e605bfbc439368268116963
3
  size 2490339591
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de31c8911196c0ecac7af2e29267430caa4c7f19fac913f7fd19f959c8790000
3
  size 1262065048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9fe9ece7c8d596406cb8184e772cd600ac8713a89bd8b7cfcd2b5fcf1aef922
3
  size 1262065048
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aed5cf7ebeae316ea13263624242bfab7943ae4e78766c20314f6d796611af6e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b8d971c35266efc366e4ed2b2f0e0bd8cccd0cc38cab1592331781a3b1c85ea
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.055951169888097,
5
- "global_step": 14800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -524,11 +524,109 @@
524
  "eval_samples_per_second": 7.977,
525
  "eval_wer": 0.3517199017199017,
526
  "step": 14800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  }
528
  ],
529
  "max_steps": 29490,
530
  "num_train_epochs": 30,
531
- "total_flos": 8.21317833556874e+19,
532
  "trial_name": null,
533
  "trial_params": null
534
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.904374364191252,
5
+ "global_step": 17600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
524
  "eval_samples_per_second": 7.977,
525
  "eval_wer": 0.3517199017199017,
526
  "step": 14800
527
+ },
528
+ {
529
+ "epoch": 15.46,
530
+ "learning_rate": 0.00014787857882028285,
531
+ "loss": 0.0717,
532
+ "step": 15200
533
+ },
534
+ {
535
+ "epoch": 15.46,
536
+ "eval_loss": 0.49670103192329407,
537
+ "eval_runtime": 236.0843,
538
+ "eval_samples_per_second": 7.925,
539
+ "eval_wer": 0.3600737100737101,
540
+ "step": 15200
541
+ },
542
+ {
543
+ "epoch": 15.87,
544
+ "learning_rate": 0.00014373922042083476,
545
+ "loss": 0.0708,
546
+ "step": 15600
547
+ },
548
+ {
549
+ "epoch": 15.87,
550
+ "eval_loss": 0.46058785915374756,
551
+ "eval_runtime": 242.6156,
552
+ "eval_samples_per_second": 7.712,
553
+ "eval_wer": 0.35982800982800983,
554
+ "step": 15600
555
+ },
556
+ {
557
+ "epoch": 16.28,
558
+ "learning_rate": 0.00013959986202138666,
559
+ "loss": 0.0673,
560
+ "step": 16000
561
+ },
562
+ {
563
+ "epoch": 16.28,
564
+ "eval_loss": 0.45084264874458313,
565
+ "eval_runtime": 234.0195,
566
+ "eval_samples_per_second": 7.995,
567
+ "eval_wer": 0.3546683046683047,
568
+ "step": 16000
569
+ },
570
+ {
571
+ "epoch": 16.68,
572
+ "learning_rate": 0.0001354605036219386,
573
+ "loss": 0.0664,
574
+ "step": 16400
575
+ },
576
+ {
577
+ "epoch": 16.68,
578
+ "eval_loss": 0.4838450849056244,
579
+ "eval_runtime": 237.8079,
580
+ "eval_samples_per_second": 7.868,
581
+ "eval_wer": 0.35614250614250614,
582
+ "step": 16400
583
+ },
584
+ {
585
+ "epoch": 17.09,
586
+ "learning_rate": 0.0001313211452224905,
587
+ "loss": 0.0639,
588
+ "step": 16800
589
+ },
590
+ {
591
+ "epoch": 17.09,
592
+ "eval_loss": 0.4703587591648102,
593
+ "eval_runtime": 235.9394,
594
+ "eval_samples_per_second": 7.93,
595
+ "eval_wer": 0.35657248157248156,
596
+ "step": 16800
597
+ },
598
+ {
599
+ "epoch": 17.5,
600
+ "learning_rate": 0.0001271817868230424,
601
+ "loss": 0.0597,
602
+ "step": 17200
603
+ },
604
+ {
605
+ "epoch": 17.5,
606
+ "eval_loss": 0.47286155819892883,
607
+ "eval_runtime": 237.4364,
608
+ "eval_samples_per_second": 7.88,
609
+ "eval_wer": 0.35657248157248156,
610
+ "step": 17200
611
+ },
612
+ {
613
+ "epoch": 17.9,
614
+ "learning_rate": 0.00012304242842359434,
615
+ "loss": 0.068,
616
+ "step": 17600
617
+ },
618
+ {
619
+ "epoch": 17.9,
620
+ "eval_loss": 0.42778506875038147,
621
+ "eval_runtime": 235.8032,
622
+ "eval_samples_per_second": 7.935,
623
+ "eval_wer": 0.3484029484029484,
624
+ "step": 17600
625
  }
626
  ],
627
  "max_steps": 29490,
628
  "num_train_epochs": 30,
629
+ "total_flos": 9.766671614435893e+19,
630
  "trial_name": null,
631
  "trial_params": null
632
  }