kmnis commited on
Commit
f9d366d
·
1 Parent(s): 512d67f

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2104c69c76bb5f7498e20e536872f422f030978404b94d89a9de441010a5a69
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d564790f12170e8eb3426162f97f210d011c0052f93d3536794c5fb4e973a8ac
3
  size 19744138
last-checkpoint/global_step1500/zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e4fc2954cc72af548664d80104267712968d8f24c344635ae6129899c7970de
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef325bd41fc084d20dadef801e072a4ac2d89af12588e1ca3d08bc70e3b0a93f
3
  size 29495149
last-checkpoint/global_step1500/zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73ef8e42b87dcf95560f1c316b545e4ec9f260b52ad4d66ba4ef8413401db1d0
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5373c6d0b6d8090728a9568fc9cbbb4d8a8166d7d3931c1cd8fec675d9a5667
3
  size 29495149
last-checkpoint/global_step1500/zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b2a1de6b973813c41c7c5ebeefa2b87fd159b170d4caacbeca84db64d8344ae
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0985b989ac9aab8b36becd33d9a7052e786e5d499e97153b60837ce7713a5e
3
  size 29495149
last-checkpoint/global_step1500/zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2d5206daedb91f73077aecacd50e94e2d6ab8af8cc257e5bfbe899664c7e77e
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e157f34028cc9ae9977ed3b9ea517de8f0c6b55bbac619b5b3a332f52a9b4606
3
  size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1000
 
1
+ global_step1500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e8728622af315db29b20e7895285cdcf395df663817cbd8286e8a542da77a9e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae6d6720b15d33c5be4514b3a84567730795fd13a72414380b33c8627313482
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abcaf5452298de0401ff6275854839dac435f28b7ef9e373a40036dc3a2d0391
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21210fc84cdeb9ead738b46e656c45624567b9d572c3ad0ea9fa169dff66448e
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95f0bdcdc9a519a9ba2d416e546cb553cfb5a5725027ca6027cc188c0e93d2da
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8694520fe08f7a3fe633f373a0dc945b74219cd9f27299648abfd33c17fb7442
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bf5603ebd4cae7ae6cdcd72b39a5bb65e2c71003884ab266de81eddf51ac20d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a9d6951e84f6da45adca2f04a48bf1d75088b7d2fe1a9040ff33c84bae9cfc
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.42158516020236086,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,13 +607,313 @@
607
  "learning_rate": 1e-05,
608
  "loss": 0.7154,
609
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 10,
613
  "max_steps": 5000,
614
  "num_train_epochs": 3,
615
  "save_steps": 500,
616
- "total_flos": 251028716912640.0,
617
  "trial_name": null,
618
  "trial_params": null
619
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6323777403035413,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "learning_rate": 1e-05,
608
  "loss": 0.7154,
609
  "step": 1000
610
+ },
611
+ {
612
+ "epoch": 0.43,
613
+ "learning_rate": 1e-05,
614
+ "loss": 0.7956,
615
+ "step": 1010
616
+ },
617
+ {
618
+ "epoch": 0.43,
619
+ "learning_rate": 1e-05,
620
+ "loss": 0.7585,
621
+ "step": 1020
622
+ },
623
+ {
624
+ "epoch": 0.43,
625
+ "learning_rate": 1e-05,
626
+ "loss": 0.6719,
627
+ "step": 1030
628
+ },
629
+ {
630
+ "epoch": 0.44,
631
+ "learning_rate": 1e-05,
632
+ "loss": 0.7522,
633
+ "step": 1040
634
+ },
635
+ {
636
+ "epoch": 0.44,
637
+ "learning_rate": 1e-05,
638
+ "loss": 0.6662,
639
+ "step": 1050
640
+ },
641
+ {
642
+ "epoch": 0.45,
643
+ "learning_rate": 1e-05,
644
+ "loss": 0.7183,
645
+ "step": 1060
646
+ },
647
+ {
648
+ "epoch": 0.45,
649
+ "learning_rate": 1e-05,
650
+ "loss": 0.7223,
651
+ "step": 1070
652
+ },
653
+ {
654
+ "epoch": 0.46,
655
+ "learning_rate": 1e-05,
656
+ "loss": 0.7465,
657
+ "step": 1080
658
+ },
659
+ {
660
+ "epoch": 0.46,
661
+ "learning_rate": 1e-05,
662
+ "loss": 0.7484,
663
+ "step": 1090
664
+ },
665
+ {
666
+ "epoch": 0.46,
667
+ "learning_rate": 1e-05,
668
+ "loss": 0.6895,
669
+ "step": 1100
670
+ },
671
+ {
672
+ "epoch": 0.47,
673
+ "learning_rate": 1e-05,
674
+ "loss": 0.6901,
675
+ "step": 1110
676
+ },
677
+ {
678
+ "epoch": 0.47,
679
+ "learning_rate": 1e-05,
680
+ "loss": 0.722,
681
+ "step": 1120
682
+ },
683
+ {
684
+ "epoch": 0.48,
685
+ "learning_rate": 1e-05,
686
+ "loss": 0.7205,
687
+ "step": 1130
688
+ },
689
+ {
690
+ "epoch": 0.48,
691
+ "learning_rate": 1e-05,
692
+ "loss": 0.7023,
693
+ "step": 1140
694
+ },
695
+ {
696
+ "epoch": 0.48,
697
+ "learning_rate": 1e-05,
698
+ "loss": 0.7362,
699
+ "step": 1150
700
+ },
701
+ {
702
+ "epoch": 0.49,
703
+ "learning_rate": 1e-05,
704
+ "loss": 0.7017,
705
+ "step": 1160
706
+ },
707
+ {
708
+ "epoch": 0.49,
709
+ "learning_rate": 1e-05,
710
+ "loss": 0.7371,
711
+ "step": 1170
712
+ },
713
+ {
714
+ "epoch": 0.5,
715
+ "learning_rate": 1e-05,
716
+ "loss": 0.6835,
717
+ "step": 1180
718
+ },
719
+ {
720
+ "epoch": 0.5,
721
+ "learning_rate": 1e-05,
722
+ "loss": 0.6932,
723
+ "step": 1190
724
+ },
725
+ {
726
+ "epoch": 0.51,
727
+ "learning_rate": 1e-05,
728
+ "loss": 0.709,
729
+ "step": 1200
730
+ },
731
+ {
732
+ "epoch": 0.51,
733
+ "learning_rate": 1e-05,
734
+ "loss": 0.7183,
735
+ "step": 1210
736
+ },
737
+ {
738
+ "epoch": 0.51,
739
+ "learning_rate": 1e-05,
740
+ "loss": 0.6728,
741
+ "step": 1220
742
+ },
743
+ {
744
+ "epoch": 0.52,
745
+ "learning_rate": 1e-05,
746
+ "loss": 0.7082,
747
+ "step": 1230
748
+ },
749
+ {
750
+ "epoch": 0.52,
751
+ "learning_rate": 1e-05,
752
+ "loss": 0.7386,
753
+ "step": 1240
754
+ },
755
+ {
756
+ "epoch": 0.53,
757
+ "learning_rate": 1e-05,
758
+ "loss": 0.7759,
759
+ "step": 1250
760
+ },
761
+ {
762
+ "epoch": 0.53,
763
+ "learning_rate": 1e-05,
764
+ "loss": 0.7074,
765
+ "step": 1260
766
+ },
767
+ {
768
+ "epoch": 0.54,
769
+ "learning_rate": 1e-05,
770
+ "loss": 0.7434,
771
+ "step": 1270
772
+ },
773
+ {
774
+ "epoch": 0.54,
775
+ "learning_rate": 1e-05,
776
+ "loss": 0.7182,
777
+ "step": 1280
778
+ },
779
+ {
780
+ "epoch": 0.54,
781
+ "learning_rate": 1e-05,
782
+ "loss": 0.7363,
783
+ "step": 1290
784
+ },
785
+ {
786
+ "epoch": 0.55,
787
+ "learning_rate": 1e-05,
788
+ "loss": 0.7298,
789
+ "step": 1300
790
+ },
791
+ {
792
+ "epoch": 0.55,
793
+ "learning_rate": 1e-05,
794
+ "loss": 0.7573,
795
+ "step": 1310
796
+ },
797
+ {
798
+ "epoch": 0.56,
799
+ "learning_rate": 1e-05,
800
+ "loss": 0.6634,
801
+ "step": 1320
802
+ },
803
+ {
804
+ "epoch": 0.56,
805
+ "learning_rate": 1e-05,
806
+ "loss": 0.7064,
807
+ "step": 1330
808
+ },
809
+ {
810
+ "epoch": 0.56,
811
+ "learning_rate": 1e-05,
812
+ "loss": 0.7203,
813
+ "step": 1340
814
+ },
815
+ {
816
+ "epoch": 0.57,
817
+ "learning_rate": 1e-05,
818
+ "loss": 0.7016,
819
+ "step": 1350
820
+ },
821
+ {
822
+ "epoch": 0.57,
823
+ "learning_rate": 1e-05,
824
+ "loss": 0.7063,
825
+ "step": 1360
826
+ },
827
+ {
828
+ "epoch": 0.58,
829
+ "learning_rate": 1e-05,
830
+ "loss": 0.7035,
831
+ "step": 1370
832
+ },
833
+ {
834
+ "epoch": 0.58,
835
+ "learning_rate": 1e-05,
836
+ "loss": 0.7432,
837
+ "step": 1380
838
+ },
839
+ {
840
+ "epoch": 0.59,
841
+ "learning_rate": 1e-05,
842
+ "loss": 0.6509,
843
+ "step": 1390
844
+ },
845
+ {
846
+ "epoch": 0.59,
847
+ "learning_rate": 1e-05,
848
+ "loss": 0.7226,
849
+ "step": 1400
850
+ },
851
+ {
852
+ "epoch": 0.59,
853
+ "learning_rate": 1e-05,
854
+ "loss": 0.6995,
855
+ "step": 1410
856
+ },
857
+ {
858
+ "epoch": 0.6,
859
+ "learning_rate": 1e-05,
860
+ "loss": 0.6947,
861
+ "step": 1420
862
+ },
863
+ {
864
+ "epoch": 0.6,
865
+ "learning_rate": 1e-05,
866
+ "loss": 0.7199,
867
+ "step": 1430
868
+ },
869
+ {
870
+ "epoch": 0.61,
871
+ "learning_rate": 1e-05,
872
+ "loss": 0.677,
873
+ "step": 1440
874
+ },
875
+ {
876
+ "epoch": 0.61,
877
+ "learning_rate": 1e-05,
878
+ "loss": 0.6938,
879
+ "step": 1450
880
+ },
881
+ {
882
+ "epoch": 0.62,
883
+ "learning_rate": 1e-05,
884
+ "loss": 0.7324,
885
+ "step": 1460
886
+ },
887
+ {
888
+ "epoch": 0.62,
889
+ "learning_rate": 1e-05,
890
+ "loss": 0.6413,
891
+ "step": 1470
892
+ },
893
+ {
894
+ "epoch": 0.62,
895
+ "learning_rate": 1e-05,
896
+ "loss": 0.7271,
897
+ "step": 1480
898
+ },
899
+ {
900
+ "epoch": 0.63,
901
+ "learning_rate": 1e-05,
902
+ "loss": 0.685,
903
+ "step": 1490
904
+ },
905
+ {
906
+ "epoch": 0.63,
907
+ "learning_rate": 1e-05,
908
+ "loss": 0.7034,
909
+ "step": 1500
910
  }
911
  ],
912
  "logging_steps": 10,
913
  "max_steps": 5000,
914
  "num_train_epochs": 3,
915
  "save_steps": 500,
916
+ "total_flos": 376920751472640.0,
917
  "trial_name": null,
918
  "trial_params": null
919
  }