JulienRPA commited on
Commit
f92bdc3
1 Parent(s): 76aa74c

Training in progress, step 15000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62b9e0c6cb076e90d989fceca45a6f04ff44219f1d244b7aa0253efb4df6f91d
3
  size 1987250795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e88223f691161e3ffd4564e693f314c328a9dda83c6b6102570c77aacceb5b7
3
  size 1987250795
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98c4a4b485878383c35f3ddf1b6b6d3df97d84af3cfe747974b0ccdb1ceddd6b
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9b0c8d9009664c4300b90d08bc7562ffcb54fad5cddd58a67988a9614811c9
3
  size 996026489
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40d1365c3f6a8ff89daca7ecc77aad972e19d826a31739e1520b525b82b87480
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4859bbb7f1b84791b7031a22e16d3ec69d1279cc060d7fd0e413b65e6970cdf9
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e271050f83a2cc06d190381bb7b858ec954f8745b3e89afccdd097156cff852
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779622c08e70491b7a953485f3890d1c81acb5e7c732a61a899ff0473fd0cfdf
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.149377593360996,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -666,11 +666,335 @@
666
  "eval_samples_per_second": 1.052,
667
  "eval_steps_per_second": 0.147,
668
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  }
670
  ],
671
  "max_steps": 24100,
672
  "num_train_epochs": 10,
673
- "total_flos": 2904681923357184.0,
674
  "trial_name": null,
675
  "trial_params": null
676
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.224066390041494,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
666
  "eval_samples_per_second": 1.052,
667
  "eval_steps_per_second": 0.147,
668
  "step": 10000
669
+ },
670
+ {
671
+ "epoch": 4.19,
672
+ "learning_rate": 3.240740740740741e-05,
673
+ "loss": 1.1396,
674
+ "step": 10100
675
+ },
676
+ {
677
+ "epoch": 4.23,
678
+ "learning_rate": 3.217592592592593e-05,
679
+ "loss": 1.1749,
680
+ "step": 10200
681
+ },
682
+ {
683
+ "epoch": 4.27,
684
+ "learning_rate": 3.194444444444444e-05,
685
+ "loss": 1.1098,
686
+ "step": 10300
687
+ },
688
+ {
689
+ "epoch": 4.32,
690
+ "learning_rate": 3.171296296296297e-05,
691
+ "loss": 1.0741,
692
+ "step": 10400
693
+ },
694
+ {
695
+ "epoch": 4.36,
696
+ "learning_rate": 3.148148148148148e-05,
697
+ "loss": 1.1388,
698
+ "step": 10500
699
+ },
700
+ {
701
+ "epoch": 4.4,
702
+ "learning_rate": 3.125e-05,
703
+ "loss": 1.0168,
704
+ "step": 10600
705
+ },
706
+ {
707
+ "epoch": 4.44,
708
+ "learning_rate": 3.101851851851852e-05,
709
+ "loss": 1.0862,
710
+ "step": 10700
711
+ },
712
+ {
713
+ "epoch": 4.48,
714
+ "learning_rate": 3.0787037037037034e-05,
715
+ "loss": 1.0689,
716
+ "step": 10800
717
+ },
718
+ {
719
+ "epoch": 4.52,
720
+ "learning_rate": 3.055555555555556e-05,
721
+ "loss": 1.0804,
722
+ "step": 10900
723
+ },
724
+ {
725
+ "epoch": 4.56,
726
+ "learning_rate": 3.0324074074074077e-05,
727
+ "loss": 1.0312,
728
+ "step": 11000
729
+ },
730
+ {
731
+ "epoch": 4.61,
732
+ "learning_rate": 3.0092592592592593e-05,
733
+ "loss": 1.0184,
734
+ "step": 11100
735
+ },
736
+ {
737
+ "epoch": 4.65,
738
+ "learning_rate": 2.9861111111111113e-05,
739
+ "loss": 0.992,
740
+ "step": 11200
741
+ },
742
+ {
743
+ "epoch": 4.69,
744
+ "learning_rate": 2.962962962962963e-05,
745
+ "loss": 1.0191,
746
+ "step": 11300
747
+ },
748
+ {
749
+ "epoch": 4.73,
750
+ "learning_rate": 2.9398148148148146e-05,
751
+ "loss": 1.0842,
752
+ "step": 11400
753
+ },
754
+ {
755
+ "epoch": 4.77,
756
+ "learning_rate": 2.916666666666667e-05,
757
+ "loss": 1.0508,
758
+ "step": 11500
759
+ },
760
+ {
761
+ "epoch": 4.81,
762
+ "learning_rate": 2.8935185185185186e-05,
763
+ "loss": 0.9815,
764
+ "step": 11600
765
+ },
766
+ {
767
+ "epoch": 4.85,
768
+ "learning_rate": 2.8703703703703706e-05,
769
+ "loss": 0.9645,
770
+ "step": 11700
771
+ },
772
+ {
773
+ "epoch": 4.9,
774
+ "learning_rate": 2.8472222222222223e-05,
775
+ "loss": 0.9826,
776
+ "step": 11800
777
+ },
778
+ {
779
+ "epoch": 4.94,
780
+ "learning_rate": 2.824074074074074e-05,
781
+ "loss": 1.0036,
782
+ "step": 11900
783
+ },
784
+ {
785
+ "epoch": 4.98,
786
+ "learning_rate": 2.8009259259259263e-05,
787
+ "loss": 0.9994,
788
+ "step": 12000
789
+ },
790
+ {
791
+ "epoch": 4.98,
792
+ "eval_bleu": 68.0869,
793
+ "eval_em": 0.02,
794
+ "eval_gen_len": 47.76,
795
+ "eval_loss": 1.0576136112213135,
796
+ "eval_rm": 0.8889,
797
+ "eval_runtime": 48.2259,
798
+ "eval_samples_per_second": 1.037,
799
+ "eval_steps_per_second": 0.145,
800
+ "step": 12000
801
+ },
802
+ {
803
+ "epoch": 5.02,
804
+ "learning_rate": 2.777777777777778e-05,
805
+ "loss": 0.8445,
806
+ "step": 12100
807
+ },
808
+ {
809
+ "epoch": 5.06,
810
+ "learning_rate": 2.75462962962963e-05,
811
+ "loss": 0.833,
812
+ "step": 12200
813
+ },
814
+ {
815
+ "epoch": 5.1,
816
+ "learning_rate": 2.7314814814814816e-05,
817
+ "loss": 0.8034,
818
+ "step": 12300
819
+ },
820
+ {
821
+ "epoch": 5.15,
822
+ "learning_rate": 2.7083333333333332e-05,
823
+ "loss": 0.8154,
824
+ "step": 12400
825
+ },
826
+ {
827
+ "epoch": 5.19,
828
+ "learning_rate": 2.6851851851851855e-05,
829
+ "loss": 0.8026,
830
+ "step": 12500
831
+ },
832
+ {
833
+ "epoch": 5.23,
834
+ "learning_rate": 2.6620370370370372e-05,
835
+ "loss": 0.8147,
836
+ "step": 12600
837
+ },
838
+ {
839
+ "epoch": 5.27,
840
+ "learning_rate": 2.6388888888888892e-05,
841
+ "loss": 0.8466,
842
+ "step": 12700
843
+ },
844
+ {
845
+ "epoch": 5.31,
846
+ "learning_rate": 2.615740740740741e-05,
847
+ "loss": 0.7881,
848
+ "step": 12800
849
+ },
850
+ {
851
+ "epoch": 5.35,
852
+ "learning_rate": 2.5925925925925925e-05,
853
+ "loss": 0.7856,
854
+ "step": 12900
855
+ },
856
+ {
857
+ "epoch": 5.39,
858
+ "learning_rate": 2.5694444444444445e-05,
859
+ "loss": 0.8058,
860
+ "step": 13000
861
+ },
862
+ {
863
+ "epoch": 5.44,
864
+ "learning_rate": 2.5462962962962965e-05,
865
+ "loss": 0.7704,
866
+ "step": 13100
867
+ },
868
+ {
869
+ "epoch": 5.48,
870
+ "learning_rate": 2.5231481481481485e-05,
871
+ "loss": 0.7572,
872
+ "step": 13200
873
+ },
874
+ {
875
+ "epoch": 5.52,
876
+ "learning_rate": 2.5e-05,
877
+ "loss": 0.7757,
878
+ "step": 13300
879
+ },
880
+ {
881
+ "epoch": 5.56,
882
+ "learning_rate": 2.4768518518518518e-05,
883
+ "loss": 0.7598,
884
+ "step": 13400
885
+ },
886
+ {
887
+ "epoch": 5.6,
888
+ "learning_rate": 2.4537037037037038e-05,
889
+ "loss": 0.7268,
890
+ "step": 13500
891
+ },
892
+ {
893
+ "epoch": 5.64,
894
+ "learning_rate": 2.4305555555555558e-05,
895
+ "loss": 0.7455,
896
+ "step": 13600
897
+ },
898
+ {
899
+ "epoch": 5.68,
900
+ "learning_rate": 2.4074074074074074e-05,
901
+ "loss": 0.7525,
902
+ "step": 13700
903
+ },
904
+ {
905
+ "epoch": 5.73,
906
+ "learning_rate": 2.3842592592592594e-05,
907
+ "loss": 0.7205,
908
+ "step": 13800
909
+ },
910
+ {
911
+ "epoch": 5.77,
912
+ "learning_rate": 2.361111111111111e-05,
913
+ "loss": 0.7741,
914
+ "step": 13900
915
+ },
916
+ {
917
+ "epoch": 5.81,
918
+ "learning_rate": 2.337962962962963e-05,
919
+ "loss": 0.7275,
920
+ "step": 14000
921
+ },
922
+ {
923
+ "epoch": 5.81,
924
+ "eval_bleu": 74.1032,
925
+ "eval_em": 0.02,
926
+ "eval_gen_len": 46.52,
927
+ "eval_loss": 0.952226996421814,
928
+ "eval_rm": 0.9556,
929
+ "eval_runtime": 45.6891,
930
+ "eval_samples_per_second": 1.094,
931
+ "eval_steps_per_second": 0.153,
932
+ "step": 14000
933
+ },
934
+ {
935
+ "epoch": 5.85,
936
+ "learning_rate": 2.314814814814815e-05,
937
+ "loss": 0.7429,
938
+ "step": 14100
939
+ },
940
+ {
941
+ "epoch": 5.89,
942
+ "learning_rate": 2.2916666666666667e-05,
943
+ "loss": 0.7032,
944
+ "step": 14200
945
+ },
946
+ {
947
+ "epoch": 5.93,
948
+ "learning_rate": 2.2685185185185187e-05,
949
+ "loss": 0.749,
950
+ "step": 14300
951
+ },
952
+ {
953
+ "epoch": 5.98,
954
+ "learning_rate": 2.2453703703703703e-05,
955
+ "loss": 0.7122,
956
+ "step": 14400
957
+ },
958
+ {
959
+ "epoch": 6.02,
960
+ "learning_rate": 2.2222222222222223e-05,
961
+ "loss": 0.6559,
962
+ "step": 14500
963
+ },
964
+ {
965
+ "epoch": 6.06,
966
+ "learning_rate": 2.1990740740740743e-05,
967
+ "loss": 0.5757,
968
+ "step": 14600
969
+ },
970
+ {
971
+ "epoch": 6.1,
972
+ "learning_rate": 2.175925925925926e-05,
973
+ "loss": 0.5756,
974
+ "step": 14700
975
+ },
976
+ {
977
+ "epoch": 6.14,
978
+ "learning_rate": 2.152777777777778e-05,
979
+ "loss": 0.5777,
980
+ "step": 14800
981
+ },
982
+ {
983
+ "epoch": 6.18,
984
+ "learning_rate": 2.1296296296296296e-05,
985
+ "loss": 0.5904,
986
+ "step": 14900
987
+ },
988
+ {
989
+ "epoch": 6.22,
990
+ "learning_rate": 2.1064814814814816e-05,
991
+ "loss": 0.5798,
992
+ "step": 15000
993
  }
994
  ],
995
  "max_steps": 24100,
996
  "num_train_epochs": 10,
997
+ "total_flos": 4374137384877888.0,
998
  "trial_name": null,
999
  "trial_params": null
1000
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98c4a4b485878383c35f3ddf1b6b6d3df97d84af3cfe747974b0ccdb1ceddd6b
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9b0c8d9009664c4300b90d08bc7562ffcb54fad5cddd58a67988a9614811c9
3
  size 996026489
runs/May23_09-05-35_dca52c8e2827/events.out.tfevents.1684832750.dca52c8e2827.29730.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90f5524e866aea4ce07ef1d143c8d4d838adfafa3318f85e0e75b3258204b4e2
3
- size 26552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6530118d38d0e4689416570db5f5431f217aa7f1e59a0e5e60315427d4623079
3
+ size 35326