Stojanco Tudzarski commited on
Commit
53faf04
1 Parent(s): 5d0e86e

110000 steps

Browse files
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +483 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee73c73a013e5106ea75ab7b12b5bc5a769b46e652fcff7d028464dfc3b159ef
3
  size 668102115
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4079a08f769039f5384ee04bea37b0133941ed92b0d2a0aaf1aabdd415e4a3a1
3
  size 668102115
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7084e2d3d3c1ddc9ca78d02f8d1aa6212d4413882334aedf22d29a957926f67
3
  size 334067506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de14c83d1079fa3df3415d4a6f3e0032b5961d15576af2ca7c7364111c32abf6
3
  size 334067506
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3260ac8e6fd861ac94b726d086012dae1d320db3cf849044d311ba5da2008b7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30834c9eacd278233301e5743cc24659976bc3e5f25f5b70bde06c4f27d0334
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.840146837977641,
5
- "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -846,11 +846,491 @@
846
  "learning_rate": 2.0799265810111798e-05,
847
  "loss": 2.8456,
848
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  }
850
  ],
851
  "max_steps": 119860,
852
  "num_train_epochs": 10,
853
- "total_flos": 2.8729170317888717e+17,
854
  "trial_name": null,
855
  "trial_params": null
856
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.177373602536292,
5
+ "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
846
  "learning_rate": 2.0799265810111798e-05,
847
  "loss": 2.8456,
848
  "step": 70000
849
+ },
850
+ {
851
+ "epoch": 5.88,
852
+ "learning_rate": 2.0590689137326883e-05,
853
+ "loss": 2.8388,
854
+ "step": 70500
855
+ },
856
+ {
857
+ "epoch": 5.92,
858
+ "learning_rate": 2.0382112464541968e-05,
859
+ "loss": 2.8346,
860
+ "step": 71000
861
+ },
862
+ {
863
+ "epoch": 5.97,
864
+ "learning_rate": 2.017353579175705e-05,
865
+ "loss": 2.8259,
866
+ "step": 71500
867
+ },
868
+ {
869
+ "epoch": 6.01,
870
+ "learning_rate": 1.9964959118972134e-05,
871
+ "loss": 2.8211,
872
+ "step": 72000
873
+ },
874
+ {
875
+ "epoch": 6.05,
876
+ "learning_rate": 1.975638244618722e-05,
877
+ "loss": 2.7936,
878
+ "step": 72500
879
+ },
880
+ {
881
+ "epoch": 6.09,
882
+ "learning_rate": 1.9547805773402304e-05,
883
+ "loss": 2.7904,
884
+ "step": 73000
885
+ },
886
+ {
887
+ "epoch": 6.13,
888
+ "learning_rate": 1.9339229100617386e-05,
889
+ "loss": 2.7982,
890
+ "step": 73500
891
+ },
892
+ {
893
+ "epoch": 6.17,
894
+ "learning_rate": 1.913065242783247e-05,
895
+ "loss": 2.7948,
896
+ "step": 74000
897
+ },
898
+ {
899
+ "epoch": 6.22,
900
+ "learning_rate": 1.8922075755047556e-05,
901
+ "loss": 2.8152,
902
+ "step": 74500
903
+ },
904
+ {
905
+ "epoch": 6.26,
906
+ "learning_rate": 1.871349908226264e-05,
907
+ "loss": 2.7862,
908
+ "step": 75000
909
+ },
910
+ {
911
+ "epoch": 6.3,
912
+ "learning_rate": 1.8504922409477726e-05,
913
+ "loss": 2.7915,
914
+ "step": 75500
915
+ },
916
+ {
917
+ "epoch": 6.34,
918
+ "learning_rate": 1.829634573669281e-05,
919
+ "loss": 2.7796,
920
+ "step": 76000
921
+ },
922
+ {
923
+ "epoch": 6.38,
924
+ "learning_rate": 1.8087769063907896e-05,
925
+ "loss": 2.7892,
926
+ "step": 76500
927
+ },
928
+ {
929
+ "epoch": 6.42,
930
+ "learning_rate": 1.7879192391122977e-05,
931
+ "loss": 2.7827,
932
+ "step": 77000
933
+ },
934
+ {
935
+ "epoch": 6.47,
936
+ "learning_rate": 1.7670615718338062e-05,
937
+ "loss": 2.7857,
938
+ "step": 77500
939
+ },
940
+ {
941
+ "epoch": 6.51,
942
+ "learning_rate": 1.7462039045553147e-05,
943
+ "loss": 2.7889,
944
+ "step": 78000
945
+ },
946
+ {
947
+ "epoch": 6.55,
948
+ "learning_rate": 1.7253462372768232e-05,
949
+ "loss": 2.748,
950
+ "step": 78500
951
+ },
952
+ {
953
+ "epoch": 6.59,
954
+ "learning_rate": 1.7044885699983314e-05,
955
+ "loss": 2.7607,
956
+ "step": 79000
957
+ },
958
+ {
959
+ "epoch": 6.63,
960
+ "learning_rate": 1.68363090271984e-05,
961
+ "loss": 2.7597,
962
+ "step": 79500
963
+ },
964
+ {
965
+ "epoch": 6.67,
966
+ "learning_rate": 1.6627732354413484e-05,
967
+ "loss": 2.7591,
968
+ "step": 80000
969
+ },
970
+ {
971
+ "epoch": 6.72,
972
+ "learning_rate": 1.641915568162857e-05,
973
+ "loss": 2.759,
974
+ "step": 80500
975
+ },
976
+ {
977
+ "epoch": 6.76,
978
+ "learning_rate": 1.621057900884365e-05,
979
+ "loss": 2.7519,
980
+ "step": 81000
981
+ },
982
+ {
983
+ "epoch": 6.8,
984
+ "learning_rate": 1.6002002336058735e-05,
985
+ "loss": 2.745,
986
+ "step": 81500
987
+ },
988
+ {
989
+ "epoch": 6.84,
990
+ "learning_rate": 1.579342566327382e-05,
991
+ "loss": 2.7569,
992
+ "step": 82000
993
+ },
994
+ {
995
+ "epoch": 6.88,
996
+ "learning_rate": 1.5584848990488905e-05,
997
+ "loss": 2.7549,
998
+ "step": 82500
999
+ },
1000
+ {
1001
+ "epoch": 6.92,
1002
+ "learning_rate": 1.5376272317703987e-05,
1003
+ "loss": 2.748,
1004
+ "step": 83000
1005
+ },
1006
+ {
1007
+ "epoch": 6.97,
1008
+ "learning_rate": 1.5167695644919072e-05,
1009
+ "loss": 2.7653,
1010
+ "step": 83500
1011
+ },
1012
+ {
1013
+ "epoch": 7.01,
1014
+ "learning_rate": 1.4959118972134157e-05,
1015
+ "loss": 2.7258,
1016
+ "step": 84000
1017
+ },
1018
+ {
1019
+ "epoch": 7.05,
1020
+ "learning_rate": 1.475054229934924e-05,
1021
+ "loss": 2.729,
1022
+ "step": 84500
1023
+ },
1024
+ {
1025
+ "epoch": 7.09,
1026
+ "learning_rate": 1.4541965626564325e-05,
1027
+ "loss": 2.7198,
1028
+ "step": 85000
1029
+ },
1030
+ {
1031
+ "epoch": 7.13,
1032
+ "learning_rate": 1.4333388953779412e-05,
1033
+ "loss": 2.7184,
1034
+ "step": 85500
1035
+ },
1036
+ {
1037
+ "epoch": 7.18,
1038
+ "learning_rate": 1.4124812280994495e-05,
1039
+ "loss": 2.7079,
1040
+ "step": 86000
1041
+ },
1042
+ {
1043
+ "epoch": 7.22,
1044
+ "learning_rate": 1.391623560820958e-05,
1045
+ "loss": 2.7243,
1046
+ "step": 86500
1047
+ },
1048
+ {
1049
+ "epoch": 7.26,
1050
+ "learning_rate": 1.3707658935424663e-05,
1051
+ "loss": 2.6942,
1052
+ "step": 87000
1053
+ },
1054
+ {
1055
+ "epoch": 7.3,
1056
+ "learning_rate": 1.3499082262639748e-05,
1057
+ "loss": 2.7002,
1058
+ "step": 87500
1059
+ },
1060
+ {
1061
+ "epoch": 7.34,
1062
+ "learning_rate": 1.3290505589854831e-05,
1063
+ "loss": 2.7085,
1064
+ "step": 88000
1065
+ },
1066
+ {
1067
+ "epoch": 7.38,
1068
+ "learning_rate": 1.3081928917069916e-05,
1069
+ "loss": 2.6986,
1070
+ "step": 88500
1071
+ },
1072
+ {
1073
+ "epoch": 7.43,
1074
+ "learning_rate": 1.2873352244285e-05,
1075
+ "loss": 2.7054,
1076
+ "step": 89000
1077
+ },
1078
+ {
1079
+ "epoch": 7.47,
1080
+ "learning_rate": 1.2664775571500085e-05,
1081
+ "loss": 2.7214,
1082
+ "step": 89500
1083
+ },
1084
+ {
1085
+ "epoch": 7.51,
1086
+ "learning_rate": 1.2456198898715168e-05,
1087
+ "loss": 2.7054,
1088
+ "step": 90000
1089
+ },
1090
+ {
1091
+ "epoch": 7.55,
1092
+ "learning_rate": 1.2247622225930253e-05,
1093
+ "loss": 2.7054,
1094
+ "step": 90500
1095
+ },
1096
+ {
1097
+ "epoch": 7.59,
1098
+ "learning_rate": 1.2039045553145336e-05,
1099
+ "loss": 2.7097,
1100
+ "step": 91000
1101
+ },
1102
+ {
1103
+ "epoch": 7.63,
1104
+ "learning_rate": 1.1830468880360421e-05,
1105
+ "loss": 2.6911,
1106
+ "step": 91500
1107
+ },
1108
+ {
1109
+ "epoch": 7.68,
1110
+ "learning_rate": 1.1621892207575505e-05,
1111
+ "loss": 2.6768,
1112
+ "step": 92000
1113
+ },
1114
+ {
1115
+ "epoch": 7.72,
1116
+ "learning_rate": 1.141331553479059e-05,
1117
+ "loss": 2.6735,
1118
+ "step": 92500
1119
+ },
1120
+ {
1121
+ "epoch": 7.76,
1122
+ "learning_rate": 1.1204738862005673e-05,
1123
+ "loss": 2.703,
1124
+ "step": 93000
1125
+ },
1126
+ {
1127
+ "epoch": 7.8,
1128
+ "learning_rate": 1.099616218922076e-05,
1129
+ "loss": 2.6704,
1130
+ "step": 93500
1131
+ },
1132
+ {
1133
+ "epoch": 7.84,
1134
+ "learning_rate": 1.0787585516435843e-05,
1135
+ "loss": 2.6833,
1136
+ "step": 94000
1137
+ },
1138
+ {
1139
+ "epoch": 7.88,
1140
+ "learning_rate": 1.0579008843650926e-05,
1141
+ "loss": 2.6809,
1142
+ "step": 94500
1143
+ },
1144
+ {
1145
+ "epoch": 7.93,
1146
+ "learning_rate": 1.0370432170866011e-05,
1147
+ "loss": 2.6766,
1148
+ "step": 95000
1149
+ },
1150
+ {
1151
+ "epoch": 7.97,
1152
+ "learning_rate": 1.0161855498081094e-05,
1153
+ "loss": 2.6719,
1154
+ "step": 95500
1155
+ },
1156
+ {
1157
+ "epoch": 8.01,
1158
+ "learning_rate": 9.95327882529618e-06,
1159
+ "loss": 2.6801,
1160
+ "step": 96000
1161
+ },
1162
+ {
1163
+ "epoch": 8.05,
1164
+ "learning_rate": 9.744702152511263e-06,
1165
+ "loss": 2.6555,
1166
+ "step": 96500
1167
+ },
1168
+ {
1169
+ "epoch": 8.09,
1170
+ "learning_rate": 9.536125479726348e-06,
1171
+ "loss": 2.656,
1172
+ "step": 97000
1173
+ },
1174
+ {
1175
+ "epoch": 8.13,
1176
+ "learning_rate": 9.32754880694143e-06,
1177
+ "loss": 2.6629,
1178
+ "step": 97500
1179
+ },
1180
+ {
1181
+ "epoch": 8.18,
1182
+ "learning_rate": 9.118972134156516e-06,
1183
+ "loss": 2.6591,
1184
+ "step": 98000
1185
+ },
1186
+ {
1187
+ "epoch": 8.22,
1188
+ "learning_rate": 8.9103954613716e-06,
1189
+ "loss": 2.6521,
1190
+ "step": 98500
1191
+ },
1192
+ {
1193
+ "epoch": 8.26,
1194
+ "learning_rate": 8.701818788586686e-06,
1195
+ "loss": 2.6594,
1196
+ "step": 99000
1197
+ },
1198
+ {
1199
+ "epoch": 8.3,
1200
+ "learning_rate": 8.493242115801769e-06,
1201
+ "loss": 2.65,
1202
+ "step": 99500
1203
+ },
1204
+ {
1205
+ "epoch": 8.34,
1206
+ "learning_rate": 8.284665443016854e-06,
1207
+ "loss": 2.6492,
1208
+ "step": 100000
1209
+ },
1210
+ {
1211
+ "epoch": 8.38,
1212
+ "learning_rate": 8.076088770231937e-06,
1213
+ "loss": 2.6443,
1214
+ "step": 100500
1215
+ },
1216
+ {
1217
+ "epoch": 8.43,
1218
+ "learning_rate": 7.867512097447022e-06,
1219
+ "loss": 2.6567,
1220
+ "step": 101000
1221
+ },
1222
+ {
1223
+ "epoch": 8.47,
1224
+ "learning_rate": 7.658935424662106e-06,
1225
+ "loss": 2.6213,
1226
+ "step": 101500
1227
+ },
1228
+ {
1229
+ "epoch": 8.51,
1230
+ "learning_rate": 7.45035875187719e-06,
1231
+ "loss": 2.6659,
1232
+ "step": 102000
1233
+ },
1234
+ {
1235
+ "epoch": 8.55,
1236
+ "learning_rate": 7.241782079092274e-06,
1237
+ "loss": 2.6456,
1238
+ "step": 102500
1239
+ },
1240
+ {
1241
+ "epoch": 8.59,
1242
+ "learning_rate": 7.033205406307358e-06,
1243
+ "loss": 2.6431,
1244
+ "step": 103000
1245
+ },
1246
+ {
1247
+ "epoch": 8.64,
1248
+ "learning_rate": 6.824628733522444e-06,
1249
+ "loss": 2.6418,
1250
+ "step": 103500
1251
+ },
1252
+ {
1253
+ "epoch": 8.68,
1254
+ "learning_rate": 6.616052060737528e-06,
1255
+ "loss": 2.6437,
1256
+ "step": 104000
1257
+ },
1258
+ {
1259
+ "epoch": 8.72,
1260
+ "learning_rate": 6.407475387952612e-06,
1261
+ "loss": 2.6398,
1262
+ "step": 104500
1263
+ },
1264
+ {
1265
+ "epoch": 8.76,
1266
+ "learning_rate": 6.198898715167696e-06,
1267
+ "loss": 2.6565,
1268
+ "step": 105000
1269
+ },
1270
+ {
1271
+ "epoch": 8.8,
1272
+ "learning_rate": 5.99032204238278e-06,
1273
+ "loss": 2.6403,
1274
+ "step": 105500
1275
+ },
1276
+ {
1277
+ "epoch": 8.84,
1278
+ "learning_rate": 5.7817453695978644e-06,
1279
+ "loss": 2.6243,
1280
+ "step": 106000
1281
+ },
1282
+ {
1283
+ "epoch": 8.89,
1284
+ "learning_rate": 5.5731686968129486e-06,
1285
+ "loss": 2.6253,
1286
+ "step": 106500
1287
+ },
1288
+ {
1289
+ "epoch": 8.93,
1290
+ "learning_rate": 5.3645920240280335e-06,
1291
+ "loss": 2.6358,
1292
+ "step": 107000
1293
+ },
1294
+ {
1295
+ "epoch": 8.97,
1296
+ "learning_rate": 5.156015351243118e-06,
1297
+ "loss": 2.6267,
1298
+ "step": 107500
1299
+ },
1300
+ {
1301
+ "epoch": 9.01,
1302
+ "learning_rate": 4.947438678458202e-06,
1303
+ "loss": 2.6483,
1304
+ "step": 108000
1305
+ },
1306
+ {
1307
+ "epoch": 9.05,
1308
+ "learning_rate": 4.738862005673286e-06,
1309
+ "loss": 2.6325,
1310
+ "step": 108500
1311
+ },
1312
+ {
1313
+ "epoch": 9.09,
1314
+ "learning_rate": 4.53028533288837e-06,
1315
+ "loss": 2.6104,
1316
+ "step": 109000
1317
+ },
1318
+ {
1319
+ "epoch": 9.14,
1320
+ "learning_rate": 4.321708660103454e-06,
1321
+ "loss": 2.6263,
1322
+ "step": 109500
1323
+ },
1324
+ {
1325
+ "epoch": 9.18,
1326
+ "learning_rate": 4.113131987318538e-06,
1327
+ "loss": 2.6118,
1328
+ "step": 110000
1329
  }
1330
  ],
1331
  "max_steps": 119860,
1332
  "num_train_epochs": 10,
1333
+ "total_flos": 4.514572271830057e+17,
1334
  "trial_name": null,
1335
  "trial_params": null
1336
  }