bobox commited on
Commit
282808c
1 Parent(s): 3455691

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -909,6 +909,36 @@ You can finetune this model on your own dataset.
909
  | 1.8915 | 5300 | 2.0528 | - | - | - |
910
  | 1.9293 | 5406 | 2.4979 | - | - | - |
911
  | 1.9672 | 5512 | 2.6698 | - | - | - |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
 
913
 
914
  ### Framework Versions
 
909
  | 1.8915 | 5300 | 2.0528 | - | - | - |
910
  | 1.9293 | 5406 | 2.4979 | - | - | - |
911
  | 1.9672 | 5512 | 2.6698 | - | - | - |
912
+ | 2.0050 | 5618 | 3.2147 | - | - | - |
913
+ | 2.0428 | 5724 | 2.4885 | - | - | - |
914
+ | 2.0807 | 5830 | 2.5061 | - | - | - |
915
+ | 2.1010 | 5887 | - | 1.4211 | 2.3481 | 1.8698 |
916
+ | 2.1185 | 5936 | 2.285 | - | - | - |
917
+ | 2.1563 | 6042 | 2.6148 | - | - | - |
918
+ | 2.1941 | 6148 | 2.4811 | - | - | - |
919
+ | 2.2320 | 6254 | 2.0681 | - | - | - |
920
+ | 2.2698 | 6360 | 2.4426 | - | - | - |
921
+ | 2.3076 | 6466 | 2.5273 | - | - | - |
922
+ | 2.3455 | 6572 | 2.1097 | - | - | - |
923
+ | 2.3833 | 6678 | 2.8945 | - | - | - |
924
+ | 2.4011 | 6728 | - | 1.3394 | 2.6094 | 1.8919 |
925
+ | 2.4211 | 6784 | 2.2264 | - | - | - |
926
+ | 2.4590 | 6890 | 2.5986 | - | - | - |
927
+ | 2.4968 | 6996 | 2.3359 | - | - | - |
928
+ | 2.5346 | 7102 | 1.857 | - | - | - |
929
+ | 2.5724 | 7208 | 2.0381 | - | - | - |
930
+ | 2.6103 | 7314 | 2.0267 | - | - | - |
931
+ | 2.6481 | 7420 | 2.0914 | - | - | - |
932
+ | 2.6859 | 7526 | 1.9207 | - | - | - |
933
+ | 2.7013 | 7569 | - | 1.2556 | 2.2631 | 1.7135 |
934
+ | 2.7238 | 7632 | 2.034 | - | - | - |
935
+ | 2.7616 | 7738 | 2.2729 | - | - | - |
936
+ | 2.7994 | 7844 | 1.936 | - | - | - |
937
+ | 2.8373 | 7950 | 2.1102 | - | - | - |
938
+ | 2.8751 | 8056 | 1.6607 | - | - | - |
939
+ | 2.9129 | 8162 | 1.9579 | - | - | - |
940
+ | 2.9507 | 8268 | 2.4587 | - | - | - |
941
+ | 2.9886 | 8374 | 2.78 | - | - | - |
942
 
943
 
944
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edcfac60467b256c4c8f68601cc6c5ae38d750345174166da1d889b872dbcd64
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9cebfb9a7e7e64a849e1dc8b049ade66359af20bc5e5427afbd57e05b40900
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711f813160ab0e9cd3dbca675a0ac617410914b0dd060a6859db1aaf61b2404c
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34180c7a5cfade436f788a1c7db68ba2b2b8f1b0ab5e9c255945f7eb66fc4b5
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fbc34d5d7c6517bafd202f5246c2c4b0fe06364263d4bb83bef5ecdb3fa3a13
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bb4a8ca87963bbe30445f59452f5ad16501c0976db609eeadd04b352fae8489
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79e2948f16fd3b5f540a15bf56e60f3e359bb2424054d6c5347e0b39494d3f7d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a7cf6f0bbd2aeaecb5ee82eab7e9d52a1b4ca5b1640948aa8f69d7015a851c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 841,
6
- "global_step": 5604,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -515,6 +515,267 @@
515
  "learning_rate": 6.3209786741211076e-06,
516
  "loss": 2.6698,
517
  "step": 5512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  }
519
  ],
520
  "logging_steps": 106,
@@ -529,7 +790,7 @@
529
  "should_evaluate": false,
530
  "should_log": false,
531
  "should_save": true,
532
- "should_training_stop": false
533
  },
534
  "attributes": {}
535
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 841,
6
+ "global_step": 8406,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
515
  "learning_rate": 6.3209786741211076e-06,
516
  "loss": 2.6698,
517
  "step": 5512
518
+ },
519
+ {
520
+ "epoch": 2.0049964311206283,
521
+ "grad_norm": 25.162227630615234,
522
+ "learning_rate": 4.89963760685911e-06,
523
+ "loss": 3.2147,
524
+ "step": 5618
525
+ },
526
+ {
527
+ "epoch": 2.0428265524625266,
528
+ "grad_norm": 5.108881950378418,
529
+ "learning_rate": 3.60612086314475e-06,
530
+ "loss": 2.4885,
531
+ "step": 5724
532
+ },
533
+ {
534
+ "epoch": 2.0806566738044254,
535
+ "grad_norm": 18.789485931396484,
536
+ "learning_rate": 2.4728463164579917e-06,
537
+ "loss": 2.5061,
538
+ "step": 5830
539
+ },
540
+ {
541
+ "epoch": 2.1009992862241256,
542
+ "eval_nli-pairs_loss": 1.8698406219482422,
543
+ "eval_nli-pairs_runtime": 6.3174,
544
+ "eval_nli-pairs_samples_per_second": 118.72,
545
+ "eval_nli-pairs_steps_per_second": 6.648,
546
+ "step": 5887
547
+ },
548
+ {
549
+ "epoch": 2.1009992862241256,
550
+ "eval_scitail-pairs-pos_loss": 1.4210681915283203,
551
+ "eval_scitail-pairs-pos_runtime": 7.5787,
552
+ "eval_scitail-pairs-pos_samples_per_second": 98.961,
553
+ "eval_scitail-pairs-pos_steps_per_second": 5.542,
554
+ "step": 5887
555
+ },
556
+ {
557
+ "epoch": 2.1009992862241256,
558
+ "eval_qnli-contrastive_loss": 2.348076105117798,
559
+ "eval_qnli-contrastive_runtime": 2.4643,
560
+ "eval_qnli-contrastive_samples_per_second": 304.346,
561
+ "eval_qnli-contrastive_steps_per_second": 17.043,
562
+ "step": 5887
563
+ },
564
+ {
565
+ "epoch": 2.118486795146324,
566
+ "grad_norm": 24.064834594726562,
567
+ "learning_rate": 1.5282158804586934e-06,
568
+ "loss": 2.285,
569
+ "step": 5936
570
+ },
571
+ {
572
+ "epoch": 2.1563169164882225,
573
+ "grad_norm": 17.475284576416016,
574
+ "learning_rate": 7.95903705565364e-07,
575
+ "loss": 2.6148,
576
+ "step": 6042
577
+ },
578
+ {
579
+ "epoch": 2.1941470378301213,
580
+ "grad_norm": 30.337942123413086,
581
+ "learning_rate": 2.942628618475518e-07,
582
+ "loss": 2.4811,
583
+ "step": 6148
584
+ },
585
+ {
586
+ "epoch": 2.23197715917202,
587
+ "grad_norm": 39.4644775390625,
588
+ "learning_rate": 3.586537783372812e-08,
589
+ "loss": 2.0681,
590
+ "step": 6254
591
+ },
592
+ {
593
+ "epoch": 2.2698072805139184,
594
+ "grad_norm": 23.357696533203125,
595
+ "learning_rate": 1.9972812837304454e-05,
596
+ "loss": 2.4426,
597
+ "step": 6360
598
+ },
599
+ {
600
+ "epoch": 2.3076374018558172,
601
+ "grad_norm": 45.85642623901367,
602
+ "learning_rate": 1.973498424932629e-05,
603
+ "loss": 2.5273,
604
+ "step": 6466
605
+ },
606
+ {
607
+ "epoch": 2.345467523197716,
608
+ "grad_norm": 5.410664081573486,
609
+ "learning_rate": 1.925208877931819e-05,
610
+ "loss": 2.1097,
611
+ "step": 6572
612
+ },
613
+ {
614
+ "epoch": 2.3832976445396143,
615
+ "grad_norm": 69.4088134765625,
616
+ "learning_rate": 1.8537319199499876e-05,
617
+ "loss": 2.8945,
618
+ "step": 6678
619
+ },
620
+ {
621
+ "epoch": 2.401142041399001,
622
+ "eval_nli-pairs_loss": 1.8918577432632446,
623
+ "eval_nli-pairs_runtime": 6.2206,
624
+ "eval_nli-pairs_samples_per_second": 120.567,
625
+ "eval_nli-pairs_steps_per_second": 6.752,
626
+ "step": 6728
627
+ },
628
+ {
629
+ "epoch": 2.401142041399001,
630
+ "eval_scitail-pairs-pos_loss": 1.3393586874008179,
631
+ "eval_scitail-pairs-pos_runtime": 7.6528,
632
+ "eval_scitail-pairs-pos_samples_per_second": 98.003,
633
+ "eval_scitail-pairs-pos_steps_per_second": 5.488,
634
+ "step": 6728
635
+ },
636
+ {
637
+ "epoch": 2.401142041399001,
638
+ "eval_qnli-contrastive_loss": 2.6094088554382324,
639
+ "eval_qnli-contrastive_runtime": 2.479,
640
+ "eval_qnli-contrastive_samples_per_second": 302.543,
641
+ "eval_qnli-contrastive_steps_per_second": 16.942,
642
+ "step": 6728
643
+ },
644
+ {
645
+ "epoch": 2.421127765881513,
646
+ "grad_norm": 16.983625411987305,
647
+ "learning_rate": 1.760858893072075e-05,
648
+ "loss": 2.2264,
649
+ "step": 6784
650
+ },
651
+ {
652
+ "epoch": 2.458957887223412,
653
+ "grad_norm": 32.24399948120117,
654
+ "learning_rate": 1.6489173636397294e-05,
655
+ "loss": 2.5986,
656
+ "step": 6890
657
+ },
658
+ {
659
+ "epoch": 2.4967880085653107,
660
+ "grad_norm": 27.440216064453125,
661
+ "learning_rate": 1.5207127892211873e-05,
662
+ "loss": 2.3359,
663
+ "step": 6996
664
+ },
665
+ {
666
+ "epoch": 2.534618129907209,
667
+ "grad_norm": 5.935887813568115,
668
+ "learning_rate": 1.3794582087610502e-05,
669
+ "loss": 1.857,
670
+ "step": 7102
671
+ },
672
+ {
673
+ "epoch": 2.572448251249108,
674
+ "grad_norm": 11.255595207214355,
675
+ "learning_rate": 1.228693718002246e-05,
676
+ "loss": 2.0381,
677
+ "step": 7208
678
+ },
679
+ {
680
+ "epoch": 2.6102783725910066,
681
+ "grad_norm": 20.442119598388672,
682
+ "learning_rate": 1.072197748271959e-05,
683
+ "loss": 2.0267,
684
+ "step": 7314
685
+ },
686
+ {
687
+ "epoch": 2.648108493932905,
688
+ "grad_norm": 16.118242263793945,
689
+ "learning_rate": 9.138923721459728e-06,
690
+ "loss": 2.0914,
691
+ "step": 7420
692
+ },
693
+ {
694
+ "epoch": 2.6859386152748037,
695
+ "grad_norm": 4.989729881286621,
696
+ "learning_rate": 7.577450092030369e-06,
697
+ "loss": 1.9207,
698
+ "step": 7526
699
+ },
700
+ {
701
+ "epoch": 2.7012847965738755,
702
+ "eval_nli-pairs_loss": 1.7135257720947266,
703
+ "eval_nli-pairs_runtime": 6.2852,
704
+ "eval_nli-pairs_samples_per_second": 119.329,
705
+ "eval_nli-pairs_steps_per_second": 6.682,
706
+ "step": 7569
707
+ },
708
+ {
709
+ "epoch": 2.7012847965738755,
710
+ "eval_scitail-pairs-pos_loss": 1.2556273937225342,
711
+ "eval_scitail-pairs-pos_runtime": 7.7379,
712
+ "eval_scitail-pairs-pos_samples_per_second": 96.926,
713
+ "eval_scitail-pairs-pos_steps_per_second": 5.428,
714
+ "step": 7569
715
+ },
716
+ {
717
+ "epoch": 2.7012847965738755,
718
+ "eval_qnli-contrastive_loss": 2.2631449699401855,
719
+ "eval_qnli-contrastive_runtime": 2.4746,
720
+ "eval_qnli-contrastive_samples_per_second": 303.074,
721
+ "eval_qnli-contrastive_steps_per_second": 16.972,
722
+ "step": 7569
723
+ },
724
+ {
725
+ "epoch": 2.7237687366167025,
726
+ "grad_norm": 18.30848503112793,
727
+ "learning_rate": 6.076689953011623e-06,
728
+ "loss": 2.034,
729
+ "step": 7632
730
+ },
731
+ {
732
+ "epoch": 2.7615988579586013,
733
+ "grad_norm": 6.111113548278809,
734
+ "learning_rate": 4.674255072899656e-06,
735
+ "loss": 2.2729,
736
+ "step": 7738
737
+ },
738
+ {
739
+ "epoch": 2.7994289793004996,
740
+ "grad_norm": 24.842626571655273,
741
+ "learning_rate": 3.405293011034734e-06,
742
+ "loss": 1.936,
743
+ "step": 7844
744
+ },
745
+ {
746
+ "epoch": 2.8372591006423984,
747
+ "grad_norm": 35.68688201904297,
748
+ "learning_rate": 2.30160625607605e-06,
749
+ "loss": 2.1102,
750
+ "step": 7950
751
+ },
752
+ {
753
+ "epoch": 2.8750892219842967,
754
+ "grad_norm": 2.0094401836395264,
755
+ "learning_rate": 1.3908551980089656e-06,
756
+ "loss": 1.6607,
757
+ "step": 8056
758
+ },
759
+ {
760
+ "epoch": 2.9129193433261955,
761
+ "grad_norm": 27.046804428100586,
762
+ "learning_rate": 6.958649086507996e-07,
763
+ "loss": 1.9579,
764
+ "step": 8162
765
+ },
766
+ {
767
+ "epoch": 2.9507494646680943,
768
+ "grad_norm": 5.009235858917236,
769
+ "learning_rate": 2.340531039914673e-07,
770
+ "loss": 2.4587,
771
+ "step": 8268
772
+ },
773
+ {
774
+ "epoch": 2.988579586009993,
775
+ "grad_norm": 19.040987014770508,
776
+ "learning_rate": 1.69936246694824e-08,
777
+ "loss": 2.78,
778
+ "step": 8374
779
  }
780
  ],
781
  "logging_steps": 106,
 
790
  "should_evaluate": false,
791
  "should_log": false,
792
  "should_save": true,
793
+ "should_training_stop": true
794
  },
795
  "attributes": {}
796
  }