0xsuid commited on
Commit
8055c91
β€’
1 Parent(s): 2393ff3

Training in progress, step 1140

Browse files
last-checkpoint/{global_step857 β†’ global_step1143}/mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3a630eb85c546e2a07ac67c656eb0e589ff4107478d6b2543b9322f0fbe72b1
3
  size 5363072554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30eb2fb488eaec8a27bb0b26a171963d3112a31c76479ce8dfdd718e9567bf2
3
  size 5363072554
last-checkpoint/{global_step857 β†’ global_step1143}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b95e0c9296447f3b32cf983d2d32a532f3f8ca8faae670015e519c60d435217c
3
  size 3946735038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e6445cc536442d208833ad59863f984bc71140490a8cd3e18575006fcdd4e1
3
  size 3946735038
last-checkpoint/{global_step857 β†’ global_step1143}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4662a42a5837ec98df8be88d9356eb7a10489c8c460ae7e651b186a0b1f0cb4e
3
  size 3946736318
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11f87ca6a8d81d91ce9669ce0a0b132e16e31c589165b9dfc0f95bec607a1ce3
3
  size 3946736318
last-checkpoint/{global_step857 β†’ global_step1143}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a214156058270c73328399c945dc9c19b7da43012c26fa5049715b294e26fee8
3
  size 3946737086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076063a2d14362bcc0562f87c7d5497798acb4d6078c32ba220db1ed473f33c8
3
  size 3946737086
last-checkpoint/{global_step857 β†’ global_step1143}/zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af259a1a8fa70e65efaa3a50e1cac3ec7dcea2ec7741117ac8635c6ebe92b5a2
3
  size 3946736574
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e8189f9e1fe3d7e7bb628368a866880f0176b1f62f4c271d4ccf25c9e8e98a
3
  size 3946736574
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step857
 
1
+ global_step1143
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a52db4c4e75a7734032ea56f6f10e21c679ce9cc10bf8afa9a21d755ad0786
3
  size 5363024236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d85bffffb2fe97ca10f0460ebc0b029a11e6d606ade9c54de58bcb6de72ec8
3
  size 5363024236
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a97da7e20ed21c52148b2c6483b85625b2f9a1548b88aab792c65e6bb4d559c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6bc3b332b1d7b34dd8e7d7ed0389c868155059ddb1d908e9ac3feb6672b23c
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9eb4a21668b5edfedcaeaf648657fd094bc648e6be668a28a363580b1df942e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de5e0c7dadcd828a8d62fffc136e170202022509240a895985c7bc45cabbced
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb115f60f5dd227aeccffe5a14cb6955351d121ecfaafda0f2b7e3e864724da
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c420d12d8aa09a561480241f19154d4aedd8a866de54ed145d69f860bae6f94
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6db79cebd88fef28a9a7b5fe9a95e92b060fb78ee4e200f6d1d1fd8583e5e5e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec873fd7c31f869e7956f098c0d1e17d2296924b3c55e4971a059dd097690b6f
3
  size 14583
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.747132714363736,
5
- "global_step": 855,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1038,11 +1038,353 @@
1038
  "learning_rate": 5e-05,
1039
  "loss": 0.057,
1040
  "step": 855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1041
  }
1042
  ],
1043
  "max_steps": 1140,
1044
  "num_train_epochs": 5,
1045
- "total_flos": 6.520473573302206e+18,
1046
  "trial_name": null,
1047
  "trial_params": null
1048
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.996176952484981,
5
+ "global_step": 1140,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1038
  "learning_rate": 5e-05,
1039
  "loss": 0.057,
1040
  "step": 855
1041
+ },
1042
+ {
1043
+ "epoch": 3.77,
1044
+ "learning_rate": 5e-05,
1045
+ "loss": 0.0539,
1046
+ "step": 860
1047
+ },
1048
+ {
1049
+ "epoch": 3.79,
1050
+ "learning_rate": 5e-05,
1051
+ "loss": 0.0529,
1052
+ "step": 865
1053
+ },
1054
+ {
1055
+ "epoch": 3.81,
1056
+ "learning_rate": 5e-05,
1057
+ "loss": 0.0552,
1058
+ "step": 870
1059
+ },
1060
+ {
1061
+ "epoch": 3.83,
1062
+ "learning_rate": 5e-05,
1063
+ "loss": 0.0547,
1064
+ "step": 875
1065
+ },
1066
+ {
1067
+ "epoch": 3.86,
1068
+ "learning_rate": 5e-05,
1069
+ "loss": 0.0553,
1070
+ "step": 880
1071
+ },
1072
+ {
1073
+ "epoch": 3.88,
1074
+ "learning_rate": 5e-05,
1075
+ "loss": 0.0558,
1076
+ "step": 885
1077
+ },
1078
+ {
1079
+ "epoch": 3.9,
1080
+ "learning_rate": 5e-05,
1081
+ "loss": 0.054,
1082
+ "step": 890
1083
+ },
1084
+ {
1085
+ "epoch": 3.92,
1086
+ "learning_rate": 5e-05,
1087
+ "loss": 0.0549,
1088
+ "step": 895
1089
+ },
1090
+ {
1091
+ "epoch": 3.94,
1092
+ "learning_rate": 5e-05,
1093
+ "loss": 0.0544,
1094
+ "step": 900
1095
+ },
1096
+ {
1097
+ "epoch": 3.97,
1098
+ "learning_rate": 5e-05,
1099
+ "loss": 0.0558,
1100
+ "step": 905
1101
+ },
1102
+ {
1103
+ "epoch": 3.99,
1104
+ "learning_rate": 5e-05,
1105
+ "loss": 0.0545,
1106
+ "step": 910
1107
+ },
1108
+ {
1109
+ "epoch": 4.01,
1110
+ "learning_rate": 5e-05,
1111
+ "loss": 0.0604,
1112
+ "step": 915
1113
+ },
1114
+ {
1115
+ "epoch": 4.03,
1116
+ "learning_rate": 5e-05,
1117
+ "loss": 0.0497,
1118
+ "step": 920
1119
+ },
1120
+ {
1121
+ "epoch": 4.06,
1122
+ "learning_rate": 5e-05,
1123
+ "loss": 0.049,
1124
+ "step": 925
1125
+ },
1126
+ {
1127
+ "epoch": 4.08,
1128
+ "learning_rate": 5e-05,
1129
+ "loss": 0.0488,
1130
+ "step": 930
1131
+ },
1132
+ {
1133
+ "epoch": 4.1,
1134
+ "learning_rate": 5e-05,
1135
+ "loss": 0.0495,
1136
+ "step": 935
1137
+ },
1138
+ {
1139
+ "epoch": 4.12,
1140
+ "learning_rate": 5e-05,
1141
+ "loss": 0.049,
1142
+ "step": 940
1143
+ },
1144
+ {
1145
+ "epoch": 4.14,
1146
+ "learning_rate": 5e-05,
1147
+ "loss": 0.0502,
1148
+ "step": 945
1149
+ },
1150
+ {
1151
+ "epoch": 4.17,
1152
+ "learning_rate": 5e-05,
1153
+ "loss": 0.0493,
1154
+ "step": 950
1155
+ },
1156
+ {
1157
+ "epoch": 4.19,
1158
+ "learning_rate": 5e-05,
1159
+ "loss": 0.0496,
1160
+ "step": 955
1161
+ },
1162
+ {
1163
+ "epoch": 4.21,
1164
+ "learning_rate": 5e-05,
1165
+ "loss": 0.0475,
1166
+ "step": 960
1167
+ },
1168
+ {
1169
+ "epoch": 4.23,
1170
+ "learning_rate": 5e-05,
1171
+ "loss": 0.0486,
1172
+ "step": 965
1173
+ },
1174
+ {
1175
+ "epoch": 4.25,
1176
+ "learning_rate": 5e-05,
1177
+ "loss": 0.0503,
1178
+ "step": 970
1179
+ },
1180
+ {
1181
+ "epoch": 4.28,
1182
+ "learning_rate": 5e-05,
1183
+ "loss": 0.0508,
1184
+ "step": 975
1185
+ },
1186
+ {
1187
+ "epoch": 4.3,
1188
+ "learning_rate": 5e-05,
1189
+ "loss": 0.0501,
1190
+ "step": 980
1191
+ },
1192
+ {
1193
+ "epoch": 4.32,
1194
+ "learning_rate": 5e-05,
1195
+ "loss": 0.0499,
1196
+ "step": 985
1197
+ },
1198
+ {
1199
+ "epoch": 4.34,
1200
+ "learning_rate": 5e-05,
1201
+ "loss": 0.0485,
1202
+ "step": 990
1203
+ },
1204
+ {
1205
+ "epoch": 4.36,
1206
+ "learning_rate": 5e-05,
1207
+ "loss": 0.0494,
1208
+ "step": 995
1209
+ },
1210
+ {
1211
+ "epoch": 4.38,
1212
+ "learning_rate": 5e-05,
1213
+ "loss": 0.0503,
1214
+ "step": 1000
1215
+ },
1216
+ {
1217
+ "epoch": 4.41,
1218
+ "learning_rate": 5e-05,
1219
+ "loss": 0.0512,
1220
+ "step": 1005
1221
+ },
1222
+ {
1223
+ "epoch": 4.43,
1224
+ "learning_rate": 5e-05,
1225
+ "loss": 0.0513,
1226
+ "step": 1010
1227
+ },
1228
+ {
1229
+ "epoch": 4.45,
1230
+ "learning_rate": 5e-05,
1231
+ "loss": 0.0496,
1232
+ "step": 1015
1233
+ },
1234
+ {
1235
+ "epoch": 4.47,
1236
+ "learning_rate": 5e-05,
1237
+ "loss": 0.0493,
1238
+ "step": 1020
1239
+ },
1240
+ {
1241
+ "epoch": 4.49,
1242
+ "learning_rate": 5e-05,
1243
+ "loss": 0.0516,
1244
+ "step": 1025
1245
+ },
1246
+ {
1247
+ "epoch": 4.52,
1248
+ "learning_rate": 5e-05,
1249
+ "loss": 0.0498,
1250
+ "step": 1030
1251
+ },
1252
+ {
1253
+ "epoch": 4.54,
1254
+ "learning_rate": 5e-05,
1255
+ "loss": 0.0498,
1256
+ "step": 1035
1257
+ },
1258
+ {
1259
+ "epoch": 4.56,
1260
+ "learning_rate": 5e-05,
1261
+ "loss": 0.0491,
1262
+ "step": 1040
1263
+ },
1264
+ {
1265
+ "epoch": 4.58,
1266
+ "learning_rate": 5e-05,
1267
+ "loss": 0.047,
1268
+ "step": 1045
1269
+ },
1270
+ {
1271
+ "epoch": 4.6,
1272
+ "learning_rate": 5e-05,
1273
+ "loss": 0.0493,
1274
+ "step": 1050
1275
+ },
1276
+ {
1277
+ "epoch": 4.62,
1278
+ "learning_rate": 5e-05,
1279
+ "loss": 0.0488,
1280
+ "step": 1055
1281
+ },
1282
+ {
1283
+ "epoch": 4.65,
1284
+ "learning_rate": 5e-05,
1285
+ "loss": 0.0502,
1286
+ "step": 1060
1287
+ },
1288
+ {
1289
+ "epoch": 4.67,
1290
+ "learning_rate": 5e-05,
1291
+ "loss": 0.0511,
1292
+ "step": 1065
1293
+ },
1294
+ {
1295
+ "epoch": 4.69,
1296
+ "learning_rate": 5e-05,
1297
+ "loss": 0.0498,
1298
+ "step": 1070
1299
+ },
1300
+ {
1301
+ "epoch": 4.71,
1302
+ "learning_rate": 5e-05,
1303
+ "loss": 0.0511,
1304
+ "step": 1075
1305
+ },
1306
+ {
1307
+ "epoch": 4.73,
1308
+ "learning_rate": 5e-05,
1309
+ "loss": 0.0498,
1310
+ "step": 1080
1311
+ },
1312
+ {
1313
+ "epoch": 4.76,
1314
+ "learning_rate": 5e-05,
1315
+ "loss": 0.0521,
1316
+ "step": 1085
1317
+ },
1318
+ {
1319
+ "epoch": 4.78,
1320
+ "learning_rate": 5e-05,
1321
+ "loss": 0.0503,
1322
+ "step": 1090
1323
+ },
1324
+ {
1325
+ "epoch": 4.8,
1326
+ "learning_rate": 5e-05,
1327
+ "loss": 0.0509,
1328
+ "step": 1095
1329
+ },
1330
+ {
1331
+ "epoch": 4.82,
1332
+ "learning_rate": 5e-05,
1333
+ "loss": 0.0523,
1334
+ "step": 1100
1335
+ },
1336
+ {
1337
+ "epoch": 4.84,
1338
+ "learning_rate": 5e-05,
1339
+ "loss": 0.0465,
1340
+ "step": 1105
1341
+ },
1342
+ {
1343
+ "epoch": 4.87,
1344
+ "learning_rate": 5e-05,
1345
+ "loss": 0.0521,
1346
+ "step": 1110
1347
+ },
1348
+ {
1349
+ "epoch": 4.89,
1350
+ "learning_rate": 5e-05,
1351
+ "loss": 0.0488,
1352
+ "step": 1115
1353
+ },
1354
+ {
1355
+ "epoch": 4.91,
1356
+ "learning_rate": 5e-05,
1357
+ "loss": 0.0488,
1358
+ "step": 1120
1359
+ },
1360
+ {
1361
+ "epoch": 4.93,
1362
+ "learning_rate": 5e-05,
1363
+ "loss": 0.0502,
1364
+ "step": 1125
1365
+ },
1366
+ {
1367
+ "epoch": 4.95,
1368
+ "learning_rate": 5e-05,
1369
+ "loss": 0.048,
1370
+ "step": 1130
1371
+ },
1372
+ {
1373
+ "epoch": 4.97,
1374
+ "learning_rate": 5e-05,
1375
+ "loss": 0.0497,
1376
+ "step": 1135
1377
+ },
1378
+ {
1379
+ "epoch": 5.0,
1380
+ "learning_rate": 5e-05,
1381
+ "loss": 0.0484,
1382
+ "step": 1140
1383
  }
1384
  ],
1385
  "max_steps": 1140,
1386
  "num_train_epochs": 5,
1387
+ "total_flos": 8.693964764402942e+18,
1388
  "trial_name": null,
1389
  "trial_params": null
1390
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a52db4c4e75a7734032ea56f6f10e21c679ce9cc10bf8afa9a21d755ad0786
3
  size 5363024236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d85bffffb2fe97ca10f0460ebc0b029a11e6d606ade9c54de58bcb6de72ec8
3
  size 5363024236