marinone94 commited on
Commit
0062eb5
β€’
1 Parent(s): e0ccb33

Training in progress, step 1000

Browse files
{checkpoint-700 β†’ checkpoint-1000}/config.json RENAMED
File without changes
{checkpoint-700 β†’ checkpoint-1000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:069822e7cb50437bf3b0c3c681ba2097aca5275abda255e4fc6af68096387a0f
3
  size 2490346001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7366ea0f0c13b52774f5b65c9237727cabcae8df11565fda3d45477aa2f04998
3
  size 2490346001
{checkpoint-700 β†’ checkpoint-1000}/preprocessor_config.json RENAMED
File without changes
{checkpoint-700 β†’ checkpoint-1000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f82fd4ef9d1ee16be752146f67271d9079ee4c70b3fc0deb5e49afca834a8366
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215e82f2b95ddb6481797f61b17faad7070953e16644208286be9592449527ca
3
  size 1262067185
{checkpoint-700 β†’ checkpoint-1000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87ff61a351af84ca56c488c49a3cc628f0fcf141b18f2d6a7cc51cc4bc0d60cf
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a99d24035e5ff3cbd9dc315b818c70efc6bc64689f3aea3ad3aeea51446c2e6
3
  size 14567
{checkpoint-700 β†’ checkpoint-1000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f78925f079cf346f5de839091ad9ea905df589af85f400d7d4ba825eb420db33
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5445ebb608f848fc4858bff0aee1c5e183c4aaa9123cfc6cd096717ff96db5
3
  size 559
{checkpoint-700 β†’ checkpoint-1000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8642bc4d6c53b743cbfe106c9335222647650f89ec52d95e575d8e1f87f4f2be
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51077c0775f8e63b91513c5020d22be0afef1f97e1846c4b1d969f7f83bbe0d8
3
  size 623
{checkpoint-700 β†’ checkpoint-1000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3229527104959631,
5
- "global_step": 700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -489,11 +489,218 @@
489
  "eval_steps_per_second": 1.12,
490
  "eval_wer": 0.9999694058618369,
491
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  }
493
  ],
494
  "max_steps": 6501,
495
  "num_train_epochs": 3,
496
- "total_flos": 1.3228060515474647e+19,
497
  "trial_name": null,
498
  "trial_params": null
499
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.461361014994233,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
489
  "eval_steps_per_second": 1.12,
490
  "eval_wer": 0.9999694058618369,
491
  "step": 700
492
+ },
493
+ {
494
+ "epoch": 0.33,
495
+ "learning_rate": 6.838304552590267e-05,
496
+ "loss": 2.9139,
497
+ "step": 710
498
+ },
499
+ {
500
+ "epoch": 0.33,
501
+ "learning_rate": 6.826530612244897e-05,
502
+ "loss": 2.9957,
503
+ "step": 720
504
+ },
505
+ {
506
+ "epoch": 0.34,
507
+ "learning_rate": 6.814756671899528e-05,
508
+ "loss": 2.9842,
509
+ "step": 730
510
+ },
511
+ {
512
+ "epoch": 0.34,
513
+ "learning_rate": 6.802982731554159e-05,
514
+ "loss": 3.2804,
515
+ "step": 740
516
+ },
517
+ {
518
+ "epoch": 0.35,
519
+ "learning_rate": 6.792386185243327e-05,
520
+ "loss": 3.4944,
521
+ "step": 750
522
+ },
523
+ {
524
+ "epoch": 0.35,
525
+ "learning_rate": 6.780612244897958e-05,
526
+ "loss": 2.9905,
527
+ "step": 760
528
+ },
529
+ {
530
+ "epoch": 0.36,
531
+ "learning_rate": 6.768838304552589e-05,
532
+ "loss": 2.9692,
533
+ "step": 770
534
+ },
535
+ {
536
+ "epoch": 0.36,
537
+ "learning_rate": 6.757064364207221e-05,
538
+ "loss": 2.9747,
539
+ "step": 780
540
+ },
541
+ {
542
+ "epoch": 0.36,
543
+ "learning_rate": 6.745290423861852e-05,
544
+ "loss": 3.294,
545
+ "step": 790
546
+ },
547
+ {
548
+ "epoch": 0.37,
549
+ "learning_rate": 6.73469387755102e-05,
550
+ "loss": 3.5998,
551
+ "step": 800
552
+ },
553
+ {
554
+ "epoch": 0.37,
555
+ "eval_loss": 3.059178590774536,
556
+ "eval_runtime": 129.2337,
557
+ "eval_samples_per_second": 35.749,
558
+ "eval_steps_per_second": 1.122,
559
+ "eval_wer": 1.0,
560
+ "step": 800
561
+ },
562
+ {
563
+ "epoch": 0.37,
564
+ "learning_rate": 6.72291993720565e-05,
565
+ "loss": 2.9902,
566
+ "step": 810
567
+ },
568
+ {
569
+ "epoch": 0.38,
570
+ "learning_rate": 6.711145996860281e-05,
571
+ "loss": 2.9644,
572
+ "step": 820
573
+ },
574
+ {
575
+ "epoch": 0.38,
576
+ "learning_rate": 6.699372056514913e-05,
577
+ "loss": 2.9529,
578
+ "step": 830
579
+ },
580
+ {
581
+ "epoch": 0.39,
582
+ "learning_rate": 6.687598116169544e-05,
583
+ "loss": 3.2737,
584
+ "step": 840
585
+ },
586
+ {
587
+ "epoch": 0.39,
588
+ "learning_rate": 6.677001569858712e-05,
589
+ "loss": 3.6534,
590
+ "step": 850
591
+ },
592
+ {
593
+ "epoch": 0.4,
594
+ "learning_rate": 6.665227629513343e-05,
595
+ "loss": 2.9664,
596
+ "step": 860
597
+ },
598
+ {
599
+ "epoch": 0.4,
600
+ "learning_rate": 6.653453689167974e-05,
601
+ "loss": 2.9463,
602
+ "step": 870
603
+ },
604
+ {
605
+ "epoch": 0.41,
606
+ "learning_rate": 6.641679748822606e-05,
607
+ "loss": 2.9591,
608
+ "step": 880
609
+ },
610
+ {
611
+ "epoch": 0.41,
612
+ "learning_rate": 6.629905808477237e-05,
613
+ "loss": 3.2701,
614
+ "step": 890
615
+ },
616
+ {
617
+ "epoch": 0.42,
618
+ "learning_rate": 6.618131868131867e-05,
619
+ "loss": 3.3784,
620
+ "step": 900
621
+ },
622
+ {
623
+ "epoch": 0.42,
624
+ "eval_loss": 3.0334482192993164,
625
+ "eval_runtime": 129.072,
626
+ "eval_samples_per_second": 35.794,
627
+ "eval_steps_per_second": 1.123,
628
+ "eval_wer": 1.0,
629
+ "step": 900
630
+ },
631
+ {
632
+ "epoch": 0.42,
633
+ "learning_rate": 6.606357927786498e-05,
634
+ "loss": 2.9729,
635
+ "step": 910
636
+ },
637
+ {
638
+ "epoch": 0.42,
639
+ "learning_rate": 6.594583987441129e-05,
640
+ "loss": 2.9444,
641
+ "step": 920
642
+ },
643
+ {
644
+ "epoch": 0.43,
645
+ "learning_rate": 6.582810047095761e-05,
646
+ "loss": 2.9441,
647
+ "step": 930
648
+ },
649
+ {
650
+ "epoch": 0.43,
651
+ "learning_rate": 6.571036106750392e-05,
652
+ "loss": 3.2629,
653
+ "step": 940
654
+ },
655
+ {
656
+ "epoch": 0.44,
657
+ "learning_rate": 6.56043956043956e-05,
658
+ "loss": 3.6467,
659
+ "step": 950
660
+ },
661
+ {
662
+ "epoch": 0.44,
663
+ "learning_rate": 6.548665620094191e-05,
664
+ "loss": 2.9472,
665
+ "step": 960
666
+ },
667
+ {
668
+ "epoch": 0.45,
669
+ "learning_rate": 6.536891679748822e-05,
670
+ "loss": 2.9301,
671
+ "step": 970
672
+ },
673
+ {
674
+ "epoch": 0.45,
675
+ "learning_rate": 6.525117739403452e-05,
676
+ "loss": 2.9555,
677
+ "step": 980
678
+ },
679
+ {
680
+ "epoch": 0.46,
681
+ "learning_rate": 6.513343799058084e-05,
682
+ "loss": 3.2888,
683
+ "step": 990
684
+ },
685
+ {
686
+ "epoch": 0.46,
687
+ "learning_rate": 6.502747252747253e-05,
688
+ "loss": 3.6354,
689
+ "step": 1000
690
+ },
691
+ {
692
+ "epoch": 0.46,
693
+ "eval_loss": 2.9995181560516357,
694
+ "eval_runtime": 128.998,
695
+ "eval_samples_per_second": 35.815,
696
+ "eval_steps_per_second": 1.124,
697
+ "eval_wer": 1.0,
698
+ "step": 1000
699
  }
700
  ],
701
  "max_steps": 6501,
702
  "num_train_epochs": 3,
703
+ "total_flos": 1.8908999903639437e+19,
704
  "trial_name": null,
705
  "trial_params": null
706
  }
{checkpoint-700 β†’ checkpoint-1000}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03c4ad43954ff5d68a16a7e40b86626a429d98e2fdeccfd745f3e2d0f37d6ea5
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215e82f2b95ddb6481797f61b17faad7070953e16644208286be9592449527ca
3
  size 1262067185