navjordj commited on
Commit
b1add90
1 Parent(s): 0f0588d

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +7 -7
  3. train_results.json +5 -5
  4. trainer_state.json +482 -14
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_bleu": 34.2122,
4
- "eval_gen_len": 65.0263,
5
- "eval_loss": 0.7057932019233704,
6
- "eval_runtime": 3541.072,
7
  "eval_samples": 12422,
8
- "eval_samples_per_second": 3.508,
9
- "eval_steps_per_second": 0.219,
10
- "train_loss": 1.0872868923270456,
11
- "train_runtime": 30832.4334,
12
  "train_samples": 62107,
13
- "train_samples_per_second": 6.043,
14
- "train_steps_per_second": 0.378
15
  }
1
  {
2
+ "epoch": 5.0,
3
+ "eval_bleu": 36.7184,
4
+ "eval_gen_len": 64.6249,
5
+ "eval_loss": 0.6334519386291504,
6
+ "eval_runtime": 3498.0158,
7
  "eval_samples": 12422,
8
+ "eval_samples_per_second": 3.551,
9
+ "eval_steps_per_second": 0.222,
10
+ "train_loss": 0.3320594994939522,
11
+ "train_runtime": 31593.7732,
12
  "train_samples": 62107,
13
+ "train_samples_per_second": 9.829,
14
+ "train_steps_per_second": 0.614
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_bleu": 34.2122,
4
- "eval_gen_len": 65.0263,
5
- "eval_loss": 0.7057932019233704,
6
- "eval_runtime": 3541.072,
7
  "eval_samples": 12422,
8
- "eval_samples_per_second": 3.508,
9
- "eval_steps_per_second": 0.219
10
  }
1
  {
2
+ "epoch": 5.0,
3
+ "eval_bleu": 36.7184,
4
+ "eval_gen_len": 64.6249,
5
+ "eval_loss": 0.6334519386291504,
6
+ "eval_runtime": 3498.0158,
7
  "eval_samples": 12422,
8
+ "eval_samples_per_second": 3.551,
9
+ "eval_steps_per_second": 0.222
10
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 1.0872868923270456,
4
- "train_runtime": 30832.4334,
5
  "train_samples": 62107,
6
- "train_samples_per_second": 6.043,
7
- "train_steps_per_second": 0.378
8
  }
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.3320594994939522,
4
+ "train_runtime": 31593.7732,
5
  "train_samples": 62107,
6
+ "train_samples_per_second": 9.829,
7
+ "train_steps_per_second": 0.614
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 11646,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -699,23 +699,491 @@
699
  },
700
  {
701
  "epoch": 2.99,
702
- "learning_rate": 1.97492701356689e-07,
703
- "loss": 0.881,
704
  "step": 11600
705
  },
706
  {
707
- "epoch": 3.0,
708
- "step": 11646,
709
- "total_flos": 6.280174860167578e+16,
710
- "train_loss": 1.0872868923270456,
711
- "train_runtime": 30832.4334,
712
- "train_samples_per_second": 6.043,
713
- "train_steps_per_second": 0.378
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  }
715
  ],
716
- "max_steps": 11646,
717
- "num_train_epochs": 3,
718
- "total_flos": 6.280174860167578e+16,
719
  "trial_name": null,
720
  "trial_params": null
721
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 19410,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
699
  },
700
  {
701
  "epoch": 2.99,
702
+ "learning_rate": 2.0118495620814015e-05,
703
+ "loss": 0.8823,
704
  "step": 11600
705
  },
706
  {
707
+ "epoch": 3.01,
708
+ "learning_rate": 1.9860896445131376e-05,
709
+ "loss": 0.8469,
710
+ "step": 11700
711
+ },
712
+ {
713
+ "epoch": 3.04,
714
+ "learning_rate": 1.960329726944874e-05,
715
+ "loss": 0.8762,
716
+ "step": 11800
717
+ },
718
+ {
719
+ "epoch": 3.07,
720
+ "learning_rate": 1.93456980937661e-05,
721
+ "loss": 0.8434,
722
+ "step": 11900
723
+ },
724
+ {
725
+ "epoch": 3.09,
726
+ "learning_rate": 1.9088098918083462e-05,
727
+ "loss": 0.856,
728
+ "step": 12000
729
+ },
730
+ {
731
+ "epoch": 3.12,
732
+ "learning_rate": 1.8830499742400827e-05,
733
+ "loss": 0.8453,
734
+ "step": 12100
735
+ },
736
+ {
737
+ "epoch": 3.14,
738
+ "learning_rate": 1.8572900566718188e-05,
739
+ "loss": 0.8447,
740
+ "step": 12200
741
+ },
742
+ {
743
+ "epoch": 3.17,
744
+ "learning_rate": 1.831530139103555e-05,
745
+ "loss": 0.8621,
746
+ "step": 12300
747
+ },
748
+ {
749
+ "epoch": 3.19,
750
+ "learning_rate": 1.8057702215352913e-05,
751
+ "loss": 0.8375,
752
+ "step": 12400
753
+ },
754
+ {
755
+ "epoch": 3.22,
756
+ "learning_rate": 1.7800103039670274e-05,
757
+ "loss": 0.8614,
758
+ "step": 12500
759
+ },
760
+ {
761
+ "epoch": 3.25,
762
+ "learning_rate": 1.7542503863987635e-05,
763
+ "loss": 0.8449,
764
+ "step": 12600
765
+ },
766
+ {
767
+ "epoch": 3.27,
768
+ "learning_rate": 1.7284904688305e-05,
769
+ "loss": 0.8578,
770
+ "step": 12700
771
+ },
772
+ {
773
+ "epoch": 3.3,
774
+ "learning_rate": 1.702730551262236e-05,
775
+ "loss": 0.8627,
776
+ "step": 12800
777
+ },
778
+ {
779
+ "epoch": 3.32,
780
+ "learning_rate": 1.676970633693972e-05,
781
+ "loss": 0.8396,
782
+ "step": 12900
783
+ },
784
+ {
785
+ "epoch": 3.35,
786
+ "learning_rate": 1.6512107161257085e-05,
787
+ "loss": 0.8391,
788
+ "step": 13000
789
+ },
790
+ {
791
+ "epoch": 3.37,
792
+ "learning_rate": 1.6254507985574446e-05,
793
+ "loss": 0.8475,
794
+ "step": 13100
795
+ },
796
+ {
797
+ "epoch": 3.4,
798
+ "learning_rate": 1.5996908809891807e-05,
799
+ "loss": 0.8225,
800
+ "step": 13200
801
+ },
802
+ {
803
+ "epoch": 3.43,
804
+ "learning_rate": 1.573930963420917e-05,
805
+ "loss": 0.8395,
806
+ "step": 13300
807
+ },
808
+ {
809
+ "epoch": 3.45,
810
+ "learning_rate": 1.5481710458526533e-05,
811
+ "loss": 0.8366,
812
+ "step": 13400
813
+ },
814
+ {
815
+ "epoch": 3.48,
816
+ "learning_rate": 1.5224111282843895e-05,
817
+ "loss": 0.8371,
818
+ "step": 13500
819
+ },
820
+ {
821
+ "epoch": 3.5,
822
+ "learning_rate": 1.4966512107161256e-05,
823
+ "loss": 0.8294,
824
+ "step": 13600
825
+ },
826
+ {
827
+ "epoch": 3.53,
828
+ "learning_rate": 1.4708912931478619e-05,
829
+ "loss": 0.83,
830
+ "step": 13700
831
+ },
832
+ {
833
+ "epoch": 3.55,
834
+ "learning_rate": 1.4451313755795981e-05,
835
+ "loss": 0.8357,
836
+ "step": 13800
837
+ },
838
+ {
839
+ "epoch": 3.58,
840
+ "learning_rate": 1.4193714580113342e-05,
841
+ "loss": 0.8355,
842
+ "step": 13900
843
+ },
844
+ {
845
+ "epoch": 3.61,
846
+ "learning_rate": 1.3936115404430705e-05,
847
+ "loss": 0.8592,
848
+ "step": 14000
849
+ },
850
+ {
851
+ "epoch": 3.63,
852
+ "learning_rate": 1.367851622874807e-05,
853
+ "loss": 0.811,
854
+ "step": 14100
855
+ },
856
+ {
857
+ "epoch": 3.66,
858
+ "learning_rate": 1.3420917053065432e-05,
859
+ "loss": 0.8269,
860
+ "step": 14200
861
+ },
862
+ {
863
+ "epoch": 3.68,
864
+ "learning_rate": 1.3163317877382795e-05,
865
+ "loss": 0.8447,
866
+ "step": 14300
867
+ },
868
+ {
869
+ "epoch": 3.71,
870
+ "learning_rate": 1.2905718701700156e-05,
871
+ "loss": 0.8463,
872
+ "step": 14400
873
+ },
874
+ {
875
+ "epoch": 3.74,
876
+ "learning_rate": 1.2648119526017518e-05,
877
+ "loss": 0.8257,
878
+ "step": 14500
879
+ },
880
+ {
881
+ "epoch": 3.76,
882
+ "learning_rate": 1.239052035033488e-05,
883
+ "loss": 0.8134,
884
+ "step": 14600
885
+ },
886
+ {
887
+ "epoch": 3.79,
888
+ "learning_rate": 1.2132921174652242e-05,
889
+ "loss": 0.826,
890
+ "step": 14700
891
+ },
892
+ {
893
+ "epoch": 3.81,
894
+ "learning_rate": 1.1875321998969603e-05,
895
+ "loss": 0.8358,
896
+ "step": 14800
897
+ },
898
+ {
899
+ "epoch": 3.84,
900
+ "learning_rate": 1.1617722823286967e-05,
901
+ "loss": 0.8237,
902
+ "step": 14900
903
+ },
904
+ {
905
+ "epoch": 3.86,
906
+ "learning_rate": 1.1360123647604328e-05,
907
+ "loss": 0.8314,
908
+ "step": 15000
909
+ },
910
+ {
911
+ "epoch": 3.89,
912
+ "learning_rate": 1.110252447192169e-05,
913
+ "loss": 0.8022,
914
+ "step": 15100
915
+ },
916
+ {
917
+ "epoch": 3.92,
918
+ "learning_rate": 1.0844925296239053e-05,
919
+ "loss": 0.8039,
920
+ "step": 15200
921
+ },
922
+ {
923
+ "epoch": 3.94,
924
+ "learning_rate": 1.0587326120556414e-05,
925
+ "loss": 0.8209,
926
+ "step": 15300
927
+ },
928
+ {
929
+ "epoch": 3.97,
930
+ "learning_rate": 1.0329726944873777e-05,
931
+ "loss": 0.8108,
932
+ "step": 15400
933
+ },
934
+ {
935
+ "epoch": 3.99,
936
+ "learning_rate": 1.007212776919114e-05,
937
+ "loss": 0.8068,
938
+ "step": 15500
939
+ },
940
+ {
941
+ "epoch": 4.02,
942
+ "learning_rate": 9.8145285935085e-06,
943
+ "loss": 0.8114,
944
+ "step": 15600
945
+ },
946
+ {
947
+ "epoch": 4.04,
948
+ "learning_rate": 9.556929417825863e-06,
949
+ "loss": 0.796,
950
+ "step": 15700
951
+ },
952
+ {
953
+ "epoch": 4.07,
954
+ "learning_rate": 9.299330242143226e-06,
955
+ "loss": 0.8125,
956
+ "step": 15800
957
+ },
958
+ {
959
+ "epoch": 4.1,
960
+ "learning_rate": 9.041731066460587e-06,
961
+ "loss": 0.7814,
962
+ "step": 15900
963
+ },
964
+ {
965
+ "epoch": 4.12,
966
+ "learning_rate": 8.78413189077795e-06,
967
+ "loss": 0.8087,
968
+ "step": 16000
969
+ },
970
+ {
971
+ "epoch": 4.15,
972
+ "learning_rate": 8.526532715095312e-06,
973
+ "loss": 0.7925,
974
+ "step": 16100
975
+ },
976
+ {
977
+ "epoch": 4.17,
978
+ "learning_rate": 8.268933539412673e-06,
979
+ "loss": 0.7946,
980
+ "step": 16200
981
+ },
982
+ {
983
+ "epoch": 4.2,
984
+ "learning_rate": 8.011334363730036e-06,
985
+ "loss": 0.8019,
986
+ "step": 16300
987
+ },
988
+ {
989
+ "epoch": 4.22,
990
+ "learning_rate": 7.753735188047398e-06,
991
+ "loss": 0.7974,
992
+ "step": 16400
993
+ },
994
+ {
995
+ "epoch": 4.25,
996
+ "learning_rate": 7.496136012364761e-06,
997
+ "loss": 0.7763,
998
+ "step": 16500
999
+ },
1000
+ {
1001
+ "epoch": 4.28,
1002
+ "learning_rate": 7.238536836682124e-06,
1003
+ "loss": 0.8158,
1004
+ "step": 16600
1005
+ },
1006
+ {
1007
+ "epoch": 4.3,
1008
+ "learning_rate": 6.9809376609994855e-06,
1009
+ "loss": 0.8042,
1010
+ "step": 16700
1011
+ },
1012
+ {
1013
+ "epoch": 4.33,
1014
+ "learning_rate": 6.723338485316847e-06,
1015
+ "loss": 0.7932,
1016
+ "step": 16800
1017
+ },
1018
+ {
1019
+ "epoch": 4.35,
1020
+ "learning_rate": 6.46573930963421e-06,
1021
+ "loss": 0.7899,
1022
+ "step": 16900
1023
+ },
1024
+ {
1025
+ "epoch": 4.38,
1026
+ "learning_rate": 6.208140133951572e-06,
1027
+ "loss": 0.7974,
1028
+ "step": 17000
1029
+ },
1030
+ {
1031
+ "epoch": 4.4,
1032
+ "learning_rate": 5.9505409582689335e-06,
1033
+ "loss": 0.8083,
1034
+ "step": 17100
1035
+ },
1036
+ {
1037
+ "epoch": 4.43,
1038
+ "learning_rate": 5.692941782586296e-06,
1039
+ "loss": 0.7924,
1040
+ "step": 17200
1041
+ },
1042
+ {
1043
+ "epoch": 4.46,
1044
+ "learning_rate": 5.435342606903658e-06,
1045
+ "loss": 0.7591,
1046
+ "step": 17300
1047
+ },
1048
+ {
1049
+ "epoch": 4.48,
1050
+ "learning_rate": 5.17774343122102e-06,
1051
+ "loss": 0.8039,
1052
+ "step": 17400
1053
+ },
1054
+ {
1055
+ "epoch": 4.51,
1056
+ "learning_rate": 4.9201442555383824e-06,
1057
+ "loss": 0.7864,
1058
+ "step": 17500
1059
+ },
1060
+ {
1061
+ "epoch": 4.53,
1062
+ "learning_rate": 4.662545079855744e-06,
1063
+ "loss": 0.779,
1064
+ "step": 17600
1065
+ },
1066
+ {
1067
+ "epoch": 4.56,
1068
+ "learning_rate": 4.404945904173107e-06,
1069
+ "loss": 0.7983,
1070
+ "step": 17700
1071
+ },
1072
+ {
1073
+ "epoch": 4.59,
1074
+ "learning_rate": 4.1473467284904695e-06,
1075
+ "loss": 0.7849,
1076
+ "step": 17800
1077
+ },
1078
+ {
1079
+ "epoch": 4.61,
1080
+ "learning_rate": 3.889747552807831e-06,
1081
+ "loss": 0.7977,
1082
+ "step": 17900
1083
+ },
1084
+ {
1085
+ "epoch": 4.64,
1086
+ "learning_rate": 3.6321483771251936e-06,
1087
+ "loss": 0.7929,
1088
+ "step": 18000
1089
+ },
1090
+ {
1091
+ "epoch": 4.66,
1092
+ "learning_rate": 3.3745492014425554e-06,
1093
+ "loss": 0.7761,
1094
+ "step": 18100
1095
+ },
1096
+ {
1097
+ "epoch": 4.69,
1098
+ "learning_rate": 3.116950025759918e-06,
1099
+ "loss": 0.7998,
1100
+ "step": 18200
1101
+ },
1102
+ {
1103
+ "epoch": 4.71,
1104
+ "learning_rate": 2.85935085007728e-06,
1105
+ "loss": 0.7863,
1106
+ "step": 18300
1107
+ },
1108
+ {
1109
+ "epoch": 4.74,
1110
+ "learning_rate": 2.601751674394642e-06,
1111
+ "loss": 0.7726,
1112
+ "step": 18400
1113
+ },
1114
+ {
1115
+ "epoch": 4.77,
1116
+ "learning_rate": 2.3441524987120042e-06,
1117
+ "loss": 0.7742,
1118
+ "step": 18500
1119
+ },
1120
+ {
1121
+ "epoch": 4.79,
1122
+ "learning_rate": 2.0865533230293665e-06,
1123
+ "loss": 0.782,
1124
+ "step": 18600
1125
+ },
1126
+ {
1127
+ "epoch": 4.82,
1128
+ "learning_rate": 1.8289541473467287e-06,
1129
+ "loss": 0.7836,
1130
+ "step": 18700
1131
+ },
1132
+ {
1133
+ "epoch": 4.84,
1134
+ "learning_rate": 1.5713549716640907e-06,
1135
+ "loss": 0.7859,
1136
+ "step": 18800
1137
+ },
1138
+ {
1139
+ "epoch": 4.87,
1140
+ "learning_rate": 1.313755795981453e-06,
1141
+ "loss": 0.8002,
1142
+ "step": 18900
1143
+ },
1144
+ {
1145
+ "epoch": 4.89,
1146
+ "learning_rate": 1.0561566202988151e-06,
1147
+ "loss": 0.7903,
1148
+ "step": 19000
1149
+ },
1150
+ {
1151
+ "epoch": 4.92,
1152
+ "learning_rate": 7.985574446161774e-07,
1153
+ "loss": 0.7888,
1154
+ "step": 19100
1155
+ },
1156
+ {
1157
+ "epoch": 4.95,
1158
+ "learning_rate": 5.409582689335394e-07,
1159
+ "loss": 0.7848,
1160
+ "step": 19200
1161
+ },
1162
+ {
1163
+ "epoch": 4.97,
1164
+ "learning_rate": 2.8335909325090165e-07,
1165
+ "loss": 0.7853,
1166
+ "step": 19300
1167
+ },
1168
+ {
1169
+ "epoch": 5.0,
1170
+ "learning_rate": 2.5759917568263784e-08,
1171
+ "loss": 0.7858,
1172
+ "step": 19400
1173
+ },
1174
+ {
1175
+ "epoch": 5.0,
1176
+ "step": 19410,
1177
+ "total_flos": 1.0472274521715917e+17,
1178
+ "train_loss": 0.3320594994939522,
1179
+ "train_runtime": 31593.7732,
1180
+ "train_samples_per_second": 9.829,
1181
+ "train_steps_per_second": 0.614
1182
  }
1183
  ],
1184
+ "max_steps": 19410,
1185
+ "num_train_epochs": 5,
1186
+ "total_flos": 1.0472274521715917e+17,
1187
  "trial_name": null,
1188
  "trial_params": null
1189
  }