Ubuntu commited on
Commit
780df45
1 Parent(s): cc48966
Files changed (2) hide show
  1. pytorch_model.bin +1 -1
  2. trainer_state.json +629 -3
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:188b012cec3039d8440f0980b517a0c0e7c8993f8a4f07b2854d3b2f700b2494
3
  size 24673403925
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f045975060d8226ff3daf019b77d5f576d821becbdcceea3e81eb46457d6ff70
3
  size 24673403925
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.12664021971315229,
5
- "global_step": 1660,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -688,11 +688,637 @@
688
  "eval_samples_per_second": 8.622,
689
  "eval_steps_per_second": 0.386,
690
  "step": 1660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
  }
692
  ],
693
  "max_steps": 13108,
694
  "num_train_epochs": 1,
695
- "total_flos": 24131860955136.0,
696
  "trial_name": null,
697
  "trial_params": null
698
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24259993896856882,
5
+ "global_step": 3180,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
688
  "eval_samples_per_second": 8.622,
689
  "eval_steps_per_second": 0.386,
690
  "step": 1660
691
+ },
692
+ {
693
+ "epoch": 0.13,
694
+ "eval_loss": 2.1107728481292725,
695
+ "eval_runtime": 35.8293,
696
+ "eval_samples_per_second": 8.736,
697
+ "eval_steps_per_second": 0.391,
698
+ "step": 1680
699
+ },
700
+ {
701
+ "epoch": 0.13,
702
+ "eval_loss": 2.1059305667877197,
703
+ "eval_runtime": 38.1951,
704
+ "eval_samples_per_second": 8.195,
705
+ "eval_steps_per_second": 0.367,
706
+ "step": 1700
707
+ },
708
+ {
709
+ "epoch": 0.13,
710
+ "eval_loss": 2.109574794769287,
711
+ "eval_runtime": 36.6746,
712
+ "eval_samples_per_second": 8.535,
713
+ "eval_steps_per_second": 0.382,
714
+ "step": 1720
715
+ },
716
+ {
717
+ "epoch": 0.13,
718
+ "eval_loss": 2.1008386611938477,
719
+ "eval_runtime": 38.8587,
720
+ "eval_samples_per_second": 8.055,
721
+ "eval_steps_per_second": 0.36,
722
+ "step": 1740
723
+ },
724
+ {
725
+ "epoch": 0.13,
726
+ "eval_loss": 2.1023361682891846,
727
+ "eval_runtime": 35.8353,
728
+ "eval_samples_per_second": 8.734,
729
+ "eval_steps_per_second": 0.391,
730
+ "step": 1760
731
+ },
732
+ {
733
+ "epoch": 0.14,
734
+ "eval_loss": 2.1008386611938477,
735
+ "eval_runtime": 36.6886,
736
+ "eval_samples_per_second": 8.531,
737
+ "eval_steps_per_second": 0.382,
738
+ "step": 1780
739
+ },
740
+ {
741
+ "epoch": 0.14,
742
+ "eval_loss": 2.093350648880005,
743
+ "eval_runtime": 38.2167,
744
+ "eval_samples_per_second": 8.19,
745
+ "eval_steps_per_second": 0.366,
746
+ "step": 1800
747
+ },
748
+ {
749
+ "epoch": 0.14,
750
+ "eval_loss": 2.1041831970214844,
751
+ "eval_runtime": 36.3105,
752
+ "eval_samples_per_second": 8.62,
753
+ "eval_steps_per_second": 0.386,
754
+ "step": 1820
755
+ },
756
+ {
757
+ "epoch": 0.14,
758
+ "eval_loss": 2.0942492485046387,
759
+ "eval_runtime": 37.4668,
760
+ "eval_samples_per_second": 8.354,
761
+ "eval_steps_per_second": 0.374,
762
+ "step": 1840
763
+ },
764
+ {
765
+ "epoch": 0.14,
766
+ "eval_loss": 2.0858376026153564,
767
+ "eval_runtime": 36.0576,
768
+ "eval_samples_per_second": 8.681,
769
+ "eval_steps_per_second": 0.388,
770
+ "step": 1860
771
+ },
772
+ {
773
+ "epoch": 0.14,
774
+ "eval_loss": 2.1020865440368652,
775
+ "eval_runtime": 37.7141,
776
+ "eval_samples_per_second": 8.299,
777
+ "eval_steps_per_second": 0.371,
778
+ "step": 1880
779
+ },
780
+ {
781
+ "epoch": 0.14,
782
+ "eval_loss": 2.1026856899261475,
783
+ "eval_runtime": 35.4823,
784
+ "eval_samples_per_second": 8.821,
785
+ "eval_steps_per_second": 0.395,
786
+ "step": 1900
787
+ },
788
+ {
789
+ "epoch": 0.15,
790
+ "eval_loss": 2.0936501026153564,
791
+ "eval_runtime": 37.4147,
792
+ "eval_samples_per_second": 8.366,
793
+ "eval_steps_per_second": 0.374,
794
+ "step": 1920
795
+ },
796
+ {
797
+ "epoch": 0.15,
798
+ "eval_loss": 2.0930511951446533,
799
+ "eval_runtime": 37.4908,
800
+ "eval_samples_per_second": 8.349,
801
+ "eval_steps_per_second": 0.373,
802
+ "step": 1940
803
+ },
804
+ {
805
+ "epoch": 0.15,
806
+ "eval_loss": 2.0927765369415283,
807
+ "eval_runtime": 35.6866,
808
+ "eval_samples_per_second": 8.771,
809
+ "eval_steps_per_second": 0.392,
810
+ "step": 1960
811
+ },
812
+ {
813
+ "epoch": 0.15,
814
+ "eval_loss": 2.0972445011138916,
815
+ "eval_runtime": 36.708,
816
+ "eval_samples_per_second": 8.527,
817
+ "eval_steps_per_second": 0.381,
818
+ "step": 1980
819
+ },
820
+ {
821
+ "epoch": 0.15,
822
+ "learning_rate": 5e-05,
823
+ "loss": 1.9023,
824
+ "step": 2000
825
+ },
826
+ {
827
+ "epoch": 0.15,
828
+ "eval_loss": 2.0981428623199463,
829
+ "eval_runtime": 37.7854,
830
+ "eval_samples_per_second": 8.284,
831
+ "eval_steps_per_second": 0.371,
832
+ "step": 2000
833
+ },
834
+ {
835
+ "epoch": 0.15,
836
+ "eval_loss": 2.0930511951446533,
837
+ "eval_runtime": 35.9143,
838
+ "eval_samples_per_second": 8.715,
839
+ "eval_steps_per_second": 0.39,
840
+ "step": 2020
841
+ },
842
+ {
843
+ "epoch": 0.16,
844
+ "eval_loss": 2.0959465503692627,
845
+ "eval_runtime": 36.7602,
846
+ "eval_samples_per_second": 8.515,
847
+ "eval_steps_per_second": 0.381,
848
+ "step": 2040
849
+ },
850
+ {
851
+ "epoch": 0.16,
852
+ "eval_loss": 2.094498872756958,
853
+ "eval_runtime": 35.6393,
854
+ "eval_samples_per_second": 8.782,
855
+ "eval_steps_per_second": 0.393,
856
+ "step": 2060
857
+ },
858
+ {
859
+ "epoch": 0.16,
860
+ "eval_loss": 2.10168719291687,
861
+ "eval_runtime": 36.6139,
862
+ "eval_samples_per_second": 8.549,
863
+ "eval_steps_per_second": 0.382,
864
+ "step": 2080
865
+ },
866
+ {
867
+ "epoch": 0.16,
868
+ "eval_loss": 2.089132308959961,
869
+ "eval_runtime": 37.1222,
870
+ "eval_samples_per_second": 8.432,
871
+ "eval_steps_per_second": 0.377,
872
+ "step": 2100
873
+ },
874
+ {
875
+ "epoch": 0.16,
876
+ "eval_loss": 2.0930261611938477,
877
+ "eval_runtime": 36.6415,
878
+ "eval_samples_per_second": 8.542,
879
+ "eval_steps_per_second": 0.382,
880
+ "step": 2120
881
+ },
882
+ {
883
+ "epoch": 0.16,
884
+ "eval_loss": 2.0837409496307373,
885
+ "eval_runtime": 35.3492,
886
+ "eval_samples_per_second": 8.855,
887
+ "eval_steps_per_second": 0.396,
888
+ "step": 2140
889
+ },
890
+ {
891
+ "epoch": 0.16,
892
+ "eval_loss": 2.0924770832061768,
893
+ "eval_runtime": 35.8577,
894
+ "eval_samples_per_second": 8.729,
895
+ "eval_steps_per_second": 0.39,
896
+ "step": 2160
897
+ },
898
+ {
899
+ "epoch": 0.17,
900
+ "eval_loss": 2.0876598358154297,
901
+ "eval_runtime": 36.7333,
902
+ "eval_samples_per_second": 8.521,
903
+ "eval_steps_per_second": 0.381,
904
+ "step": 2180
905
+ },
906
+ {
907
+ "epoch": 0.17,
908
+ "eval_loss": 2.0903303623199463,
909
+ "eval_runtime": 36.5753,
910
+ "eval_samples_per_second": 8.558,
911
+ "eval_steps_per_second": 0.383,
912
+ "step": 2200
913
+ },
914
+ {
915
+ "epoch": 0.17,
916
+ "eval_loss": 2.0882089138031006,
917
+ "eval_runtime": 37.2231,
918
+ "eval_samples_per_second": 8.409,
919
+ "eval_steps_per_second": 0.376,
920
+ "step": 2220
921
+ },
922
+ {
923
+ "epoch": 0.17,
924
+ "eval_loss": 2.0914785861968994,
925
+ "eval_runtime": 35.3728,
926
+ "eval_samples_per_second": 8.849,
927
+ "eval_steps_per_second": 0.396,
928
+ "step": 2240
929
+ },
930
+ {
931
+ "epoch": 0.17,
932
+ "eval_loss": 2.092726707458496,
933
+ "eval_runtime": 40.8988,
934
+ "eval_samples_per_second": 7.653,
935
+ "eval_steps_per_second": 0.342,
936
+ "step": 2260
937
+ },
938
+ {
939
+ "epoch": 0.17,
940
+ "eval_loss": 2.092102527618408,
941
+ "eval_runtime": 38.6861,
942
+ "eval_samples_per_second": 8.091,
943
+ "eval_steps_per_second": 0.362,
944
+ "step": 2280
945
+ },
946
+ {
947
+ "epoch": 0.18,
948
+ "eval_loss": 2.0902554988861084,
949
+ "eval_runtime": 37.1228,
950
+ "eval_samples_per_second": 8.431,
951
+ "eval_steps_per_second": 0.377,
952
+ "step": 2300
953
+ },
954
+ {
955
+ "epoch": 0.18,
956
+ "eval_loss": 2.1011133193969727,
957
+ "eval_runtime": 38.2059,
958
+ "eval_samples_per_second": 8.192,
959
+ "eval_steps_per_second": 0.366,
960
+ "step": 2320
961
+ },
962
+ {
963
+ "epoch": 0.18,
964
+ "eval_loss": 2.0915534496307373,
965
+ "eval_runtime": 37.7371,
966
+ "eval_samples_per_second": 8.294,
967
+ "eval_steps_per_second": 0.371,
968
+ "step": 2340
969
+ },
970
+ {
971
+ "epoch": 0.18,
972
+ "eval_loss": 2.084639549255371,
973
+ "eval_runtime": 37.4914,
974
+ "eval_samples_per_second": 8.349,
975
+ "eval_steps_per_second": 0.373,
976
+ "step": 2360
977
+ },
978
+ {
979
+ "epoch": 0.18,
980
+ "eval_loss": 2.0891075134277344,
981
+ "eval_runtime": 37.0809,
982
+ "eval_samples_per_second": 8.441,
983
+ "eval_steps_per_second": 0.378,
984
+ "step": 2380
985
+ },
986
+ {
987
+ "epoch": 0.18,
988
+ "eval_loss": 2.080421209335327,
989
+ "eval_runtime": 38.2834,
990
+ "eval_samples_per_second": 8.176,
991
+ "eval_steps_per_second": 0.366,
992
+ "step": 2400
993
+ },
994
+ {
995
+ "epoch": 0.18,
996
+ "eval_loss": 2.0774011611938477,
997
+ "eval_runtime": 36.0821,
998
+ "eval_samples_per_second": 8.675,
999
+ "eval_steps_per_second": 0.388,
1000
+ "step": 2420
1001
+ },
1002
+ {
1003
+ "epoch": 0.19,
1004
+ "eval_loss": 2.074655532836914,
1005
+ "eval_runtime": 38.4174,
1006
+ "eval_samples_per_second": 8.147,
1007
+ "eval_steps_per_second": 0.364,
1008
+ "step": 2440
1009
+ },
1010
+ {
1011
+ "epoch": 0.19,
1012
+ "eval_loss": 2.0918281078338623,
1013
+ "eval_runtime": 37.5135,
1014
+ "eval_samples_per_second": 8.344,
1015
+ "eval_steps_per_second": 0.373,
1016
+ "step": 2460
1017
+ },
1018
+ {
1019
+ "epoch": 0.19,
1020
+ "eval_loss": 2.0866613388061523,
1021
+ "eval_runtime": 37.8846,
1022
+ "eval_samples_per_second": 8.262,
1023
+ "eval_steps_per_second": 0.37,
1024
+ "step": 2480
1025
+ },
1026
+ {
1027
+ "epoch": 0.19,
1028
+ "learning_rate": 5e-05,
1029
+ "loss": 1.8656,
1030
+ "step": 2500
1031
+ },
1032
+ {
1033
+ "epoch": 0.19,
1034
+ "eval_loss": 2.0818939208984375,
1035
+ "eval_runtime": 37.1254,
1036
+ "eval_samples_per_second": 8.431,
1037
+ "eval_steps_per_second": 0.377,
1038
+ "step": 2500
1039
+ },
1040
+ {
1041
+ "epoch": 0.19,
1042
+ "eval_loss": 2.082193374633789,
1043
+ "eval_runtime": 37.2165,
1044
+ "eval_samples_per_second": 8.41,
1045
+ "eval_steps_per_second": 0.376,
1046
+ "step": 2520
1047
+ },
1048
+ {
1049
+ "epoch": 0.19,
1050
+ "eval_loss": 2.078274726867676,
1051
+ "eval_runtime": 38.1535,
1052
+ "eval_samples_per_second": 8.204,
1053
+ "eval_steps_per_second": 0.367,
1054
+ "step": 2540
1055
+ },
1056
+ {
1057
+ "epoch": 0.2,
1058
+ "eval_loss": 2.0924270153045654,
1059
+ "eval_runtime": 37.0529,
1060
+ "eval_samples_per_second": 8.447,
1061
+ "eval_steps_per_second": 0.378,
1062
+ "step": 2560
1063
+ },
1064
+ {
1065
+ "epoch": 0.2,
1066
+ "eval_loss": 2.0776758193969727,
1067
+ "eval_runtime": 38.2095,
1068
+ "eval_samples_per_second": 8.192,
1069
+ "eval_steps_per_second": 0.366,
1070
+ "step": 2580
1071
+ },
1072
+ {
1073
+ "epoch": 0.2,
1074
+ "eval_loss": 2.074331045150757,
1075
+ "eval_runtime": 38.0087,
1076
+ "eval_samples_per_second": 8.235,
1077
+ "eval_steps_per_second": 0.368,
1078
+ "step": 2600
1079
+ },
1080
+ {
1081
+ "epoch": 0.2,
1082
+ "eval_loss": 2.0753045082092285,
1083
+ "eval_runtime": 35.62,
1084
+ "eval_samples_per_second": 8.787,
1085
+ "eval_steps_per_second": 0.393,
1086
+ "step": 2620
1087
+ },
1088
+ {
1089
+ "epoch": 0.2,
1090
+ "eval_loss": 2.0662689208984375,
1091
+ "eval_runtime": 42.1091,
1092
+ "eval_samples_per_second": 7.433,
1093
+ "eval_steps_per_second": 0.332,
1094
+ "step": 2640
1095
+ },
1096
+ {
1097
+ "epoch": 0.2,
1098
+ "eval_loss": 2.066293954849243,
1099
+ "eval_runtime": 39.286,
1100
+ "eval_samples_per_second": 7.967,
1101
+ "eval_steps_per_second": 0.356,
1102
+ "step": 2660
1103
+ },
1104
+ {
1105
+ "epoch": 0.2,
1106
+ "eval_loss": 2.0750298500061035,
1107
+ "eval_runtime": 37.7908,
1108
+ "eval_samples_per_second": 8.282,
1109
+ "eval_steps_per_second": 0.37,
1110
+ "step": 2680
1111
+ },
1112
+ {
1113
+ "epoch": 0.21,
1114
+ "eval_loss": 2.072883367538452,
1115
+ "eval_runtime": 36.9744,
1116
+ "eval_samples_per_second": 8.465,
1117
+ "eval_steps_per_second": 0.379,
1118
+ "step": 2700
1119
+ },
1120
+ {
1121
+ "epoch": 0.21,
1122
+ "eval_loss": 2.0656700134277344,
1123
+ "eval_runtime": 38.6743,
1124
+ "eval_samples_per_second": 8.093,
1125
+ "eval_steps_per_second": 0.362,
1126
+ "step": 2720
1127
+ },
1128
+ {
1129
+ "epoch": 0.21,
1130
+ "eval_loss": 2.061077356338501,
1131
+ "eval_runtime": 37.2607,
1132
+ "eval_samples_per_second": 8.4,
1133
+ "eval_steps_per_second": 0.376,
1134
+ "step": 2740
1135
+ },
1136
+ {
1137
+ "epoch": 0.21,
1138
+ "eval_loss": 2.0596296787261963,
1139
+ "eval_runtime": 38.4938,
1140
+ "eval_samples_per_second": 8.131,
1141
+ "eval_steps_per_second": 0.364,
1142
+ "step": 2760
1143
+ },
1144
+ {
1145
+ "epoch": 0.21,
1146
+ "eval_loss": 2.0695137977600098,
1147
+ "eval_runtime": 38.4555,
1148
+ "eval_samples_per_second": 8.139,
1149
+ "eval_steps_per_second": 0.364,
1150
+ "step": 2780
1151
+ },
1152
+ {
1153
+ "epoch": 0.21,
1154
+ "eval_loss": 2.0653703212738037,
1155
+ "eval_runtime": 40.8818,
1156
+ "eval_samples_per_second": 7.656,
1157
+ "eval_steps_per_second": 0.342,
1158
+ "step": 2800
1159
+ },
1160
+ {
1161
+ "epoch": 0.22,
1162
+ "eval_loss": 2.0632736682891846,
1163
+ "eval_runtime": 37.3448,
1164
+ "eval_samples_per_second": 8.381,
1165
+ "eval_steps_per_second": 0.375,
1166
+ "step": 2820
1167
+ },
1168
+ {
1169
+ "epoch": 0.22,
1170
+ "eval_loss": 2.068690061569214,
1171
+ "eval_runtime": 38.9945,
1172
+ "eval_samples_per_second": 8.027,
1173
+ "eval_steps_per_second": 0.359,
1174
+ "step": 2840
1175
+ },
1176
+ {
1177
+ "epoch": 0.22,
1178
+ "eval_loss": 2.0744807720184326,
1179
+ "eval_runtime": 37.3809,
1180
+ "eval_samples_per_second": 8.373,
1181
+ "eval_steps_per_second": 0.375,
1182
+ "step": 2860
1183
+ },
1184
+ {
1185
+ "epoch": 0.22,
1186
+ "eval_loss": 2.068140983581543,
1187
+ "eval_runtime": 37.0455,
1188
+ "eval_samples_per_second": 8.449,
1189
+ "eval_steps_per_second": 0.378,
1190
+ "step": 2880
1191
+ },
1192
+ {
1193
+ "epoch": 0.22,
1194
+ "eval_loss": 2.0711112022399902,
1195
+ "eval_runtime": 36.5798,
1196
+ "eval_samples_per_second": 8.557,
1197
+ "eval_steps_per_second": 0.383,
1198
+ "step": 2900
1199
+ },
1200
+ {
1201
+ "epoch": 0.22,
1202
+ "eval_loss": 2.0659945011138916,
1203
+ "eval_runtime": 37.3716,
1204
+ "eval_samples_per_second": 8.375,
1205
+ "eval_steps_per_second": 0.375,
1206
+ "step": 2920
1207
+ },
1208
+ {
1209
+ "epoch": 0.22,
1210
+ "eval_loss": 2.076228141784668,
1211
+ "eval_runtime": 38.1126,
1212
+ "eval_samples_per_second": 8.213,
1213
+ "eval_steps_per_second": 0.367,
1214
+ "step": 2940
1215
+ },
1216
+ {
1217
+ "epoch": 0.23,
1218
+ "eval_loss": 2.072284460067749,
1219
+ "eval_runtime": 37.7328,
1220
+ "eval_samples_per_second": 8.295,
1221
+ "eval_steps_per_second": 0.371,
1222
+ "step": 2960
1223
+ },
1224
+ {
1225
+ "epoch": 0.23,
1226
+ "eval_loss": 2.0797972679138184,
1227
+ "eval_runtime": 39.3148,
1228
+ "eval_samples_per_second": 7.961,
1229
+ "eval_steps_per_second": 0.356,
1230
+ "step": 2980
1231
+ },
1232
+ {
1233
+ "epoch": 0.23,
1234
+ "learning_rate": 5e-05,
1235
+ "loss": 1.8034,
1236
+ "step": 3000
1237
+ },
1238
+ {
1239
+ "epoch": 0.23,
1240
+ "eval_loss": 2.0818939208984375,
1241
+ "eval_runtime": 37.1291,
1242
+ "eval_samples_per_second": 8.43,
1243
+ "eval_steps_per_second": 0.377,
1244
+ "step": 3000
1245
+ },
1246
+ {
1247
+ "epoch": 0.23,
1248
+ "eval_loss": 2.0645217895507812,
1249
+ "eval_runtime": 38.134,
1250
+ "eval_samples_per_second": 8.208,
1251
+ "eval_steps_per_second": 0.367,
1252
+ "step": 3020
1253
+ },
1254
+ {
1255
+ "epoch": 0.23,
1256
+ "eval_loss": 2.0636231899261475,
1257
+ "eval_runtime": 38.8835,
1258
+ "eval_samples_per_second": 8.05,
1259
+ "eval_steps_per_second": 0.36,
1260
+ "step": 3040
1261
+ },
1262
+ {
1263
+ "epoch": 0.23,
1264
+ "eval_loss": 2.0678415298461914,
1265
+ "eval_runtime": 38.0811,
1266
+ "eval_samples_per_second": 8.219,
1267
+ "eval_steps_per_second": 0.368,
1268
+ "step": 3060
1269
+ },
1270
+ {
1271
+ "epoch": 0.23,
1272
+ "eval_loss": 2.0711112022399902,
1273
+ "eval_runtime": 38.2925,
1274
+ "eval_samples_per_second": 8.174,
1275
+ "eval_steps_per_second": 0.366,
1276
+ "step": 3080
1277
+ },
1278
+ {
1279
+ "epoch": 0.24,
1280
+ "eval_loss": 2.063648223876953,
1281
+ "eval_runtime": 37.5261,
1282
+ "eval_samples_per_second": 8.341,
1283
+ "eval_steps_per_second": 0.373,
1284
+ "step": 3100
1285
+ },
1286
+ {
1287
+ "epoch": 0.24,
1288
+ "eval_loss": 2.0624501705169678,
1289
+ "eval_runtime": 37.6407,
1290
+ "eval_samples_per_second": 8.315,
1291
+ "eval_steps_per_second": 0.372,
1292
+ "step": 3120
1293
+ },
1294
+ {
1295
+ "epoch": 0.24,
1296
+ "eval_loss": 2.0669429302215576,
1297
+ "eval_runtime": 36.2761,
1298
+ "eval_samples_per_second": 8.628,
1299
+ "eval_steps_per_second": 0.386,
1300
+ "step": 3140
1301
+ },
1302
+ {
1303
+ "epoch": 0.24,
1304
+ "eval_loss": 2.056734323501587,
1305
+ "eval_runtime": 39.277,
1306
+ "eval_samples_per_second": 7.969,
1307
+ "eval_steps_per_second": 0.356,
1308
+ "step": 3160
1309
+ },
1310
+ {
1311
+ "epoch": 0.24,
1312
+ "eval_loss": 2.0456268787384033,
1313
+ "eval_runtime": 37.5675,
1314
+ "eval_samples_per_second": 8.332,
1315
+ "eval_steps_per_second": 0.373,
1316
+ "step": 3180
1317
  }
1318
  ],
1319
  "max_steps": 13108,
1320
  "num_train_epochs": 1,
1321
+ "total_flos": 46808734629888.0,
1322
  "trial_name": null,
1323
  "trial_params": null
1324
  }