hariniiiiiiiiii commited on
Commit
5c972c9
1 Parent(s): c7b0544

Training in progress, step 2000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67681c79cd214ed9a8ee42c881c73d19f1c39d67a78279d301dd38994ad30568
3
  size 4115013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3a3e48a26a2d622517197ff24d94300acf16b986c0e194767a2c3e5fb70890
3
  size 4115013
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d3b0d701f16fe2a1ac911c75f337bfda79fe2f7f898cd1baabb210413c7dba
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4b616d5ea5f92c203d471e43b622d1302d66f27b9105c5655030fdf5c7e986
3
  size 2329702453
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f08b7042b3265d650ddd09433dd030c04a52d87e147dd0a8cbaf2372dce6fce
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e491699f12f10fb03cc72b55406e2697d49735bc8f126f7938c3aea820b10f54
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08a9482e48e6ef3973131ed0b8d44170a3c271bf2d7cf6a402ee43fb89e77ae
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705762f34c23f694831c26f3540c14aaf4399a434031642b13285a5bcfd2e85c
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.962643323881149,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1086,11 +1086,371 @@
1086
  "eval_samples_per_second": 0.23,
1087
  "eval_steps_per_second": 0.23,
1088
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1089
  }
1090
  ],
1091
  "max_steps": 3542,
1092
  "num_train_epochs": 7,
1093
- "total_flos": 3.644099786288333e+16,
1094
  "trial_name": null,
1095
  "trial_params": null
1096
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.95080754530884,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1086
  "eval_samples_per_second": 0.23,
1087
  "eval_steps_per_second": 0.23,
1088
  "step": 1500
1089
+ },
1090
+ {
1091
+ "epoch": 2.98,
1092
+ "learning_rate": 0.0002943221320973349,
1093
+ "loss": 0.1245,
1094
+ "step": 1510
1095
+ },
1096
+ {
1097
+ "epoch": 3.0,
1098
+ "learning_rate": 0.00029287369640787947,
1099
+ "loss": 0.1917,
1100
+ "step": 1520
1101
+ },
1102
+ {
1103
+ "epoch": 3.02,
1104
+ "learning_rate": 0.0002914252607184241,
1105
+ "loss": 0.205,
1106
+ "step": 1530
1107
+ },
1108
+ {
1109
+ "epoch": 3.04,
1110
+ "learning_rate": 0.0002899768250289687,
1111
+ "loss": 0.1493,
1112
+ "step": 1540
1113
+ },
1114
+ {
1115
+ "epoch": 3.06,
1116
+ "learning_rate": 0.0002885283893395133,
1117
+ "loss": 0.1596,
1118
+ "step": 1550
1119
+ },
1120
+ {
1121
+ "epoch": 3.08,
1122
+ "learning_rate": 0.00028707995365005794,
1123
+ "loss": 0.1689,
1124
+ "step": 1560
1125
+ },
1126
+ {
1127
+ "epoch": 3.1,
1128
+ "learning_rate": 0.0002856315179606025,
1129
+ "loss": 0.1371,
1130
+ "step": 1570
1131
+ },
1132
+ {
1133
+ "epoch": 3.12,
1134
+ "learning_rate": 0.0002841830822711471,
1135
+ "loss": 0.1676,
1136
+ "step": 1580
1137
+ },
1138
+ {
1139
+ "epoch": 3.14,
1140
+ "learning_rate": 0.0002827346465816918,
1141
+ "loss": 0.1441,
1142
+ "step": 1590
1143
+ },
1144
+ {
1145
+ "epoch": 3.16,
1146
+ "learning_rate": 0.0002812862108922364,
1147
+ "loss": 0.1436,
1148
+ "step": 1600
1149
+ },
1150
+ {
1151
+ "epoch": 3.16,
1152
+ "eval_loss": 0.8872199058532715,
1153
+ "eval_rouge1": 0.16825396825396824,
1154
+ "eval_rouge2": 0.05833333333333333,
1155
+ "eval_rougeL": 0.1650793650793651,
1156
+ "eval_rougeLsum": 0.17285714285714288,
1157
+ "eval_runtime": 83.5131,
1158
+ "eval_samples_per_second": 0.239,
1159
+ "eval_steps_per_second": 0.239,
1160
+ "step": 1600
1161
+ },
1162
+ {
1163
+ "epoch": 3.18,
1164
+ "learning_rate": 0.000279837775202781,
1165
+ "loss": 0.2173,
1166
+ "step": 1610
1167
+ },
1168
+ {
1169
+ "epoch": 3.2,
1170
+ "learning_rate": 0.00027838933951332564,
1171
+ "loss": 0.1457,
1172
+ "step": 1620
1173
+ },
1174
+ {
1175
+ "epoch": 3.22,
1176
+ "learning_rate": 0.0002769409038238702,
1177
+ "loss": 0.1503,
1178
+ "step": 1630
1179
+ },
1180
+ {
1181
+ "epoch": 3.24,
1182
+ "learning_rate": 0.00027549246813441487,
1183
+ "loss": 0.1371,
1184
+ "step": 1640
1185
+ },
1186
+ {
1187
+ "epoch": 3.26,
1188
+ "learning_rate": 0.00027404403244495946,
1189
+ "loss": 0.1331,
1190
+ "step": 1650
1191
+ },
1192
+ {
1193
+ "epoch": 3.28,
1194
+ "learning_rate": 0.00027259559675550405,
1195
+ "loss": 0.2068,
1196
+ "step": 1660
1197
+ },
1198
+ {
1199
+ "epoch": 3.3,
1200
+ "learning_rate": 0.0002711471610660487,
1201
+ "loss": 0.2001,
1202
+ "step": 1670
1203
+ },
1204
+ {
1205
+ "epoch": 3.32,
1206
+ "learning_rate": 0.0002696987253765933,
1207
+ "loss": 0.177,
1208
+ "step": 1680
1209
+ },
1210
+ {
1211
+ "epoch": 3.34,
1212
+ "learning_rate": 0.00026825028968713787,
1213
+ "loss": 0.1772,
1214
+ "step": 1690
1215
+ },
1216
+ {
1217
+ "epoch": 3.36,
1218
+ "learning_rate": 0.0002668018539976825,
1219
+ "loss": 0.138,
1220
+ "step": 1700
1221
+ },
1222
+ {
1223
+ "epoch": 3.36,
1224
+ "eval_loss": 0.8929020762443542,
1225
+ "eval_rouge1": 0.22999999999999998,
1226
+ "eval_rouge2": 0.12491883116883117,
1227
+ "eval_rougeL": 0.22615384615384615,
1228
+ "eval_rougeLsum": 0.23115384615384618,
1229
+ "eval_runtime": 86.6494,
1230
+ "eval_samples_per_second": 0.231,
1231
+ "eval_steps_per_second": 0.231,
1232
+ "step": 1700
1233
+ },
1234
+ {
1235
+ "epoch": 3.38,
1236
+ "learning_rate": 0.0002653534183082271,
1237
+ "loss": 0.164,
1238
+ "step": 1710
1239
+ },
1240
+ {
1241
+ "epoch": 3.4,
1242
+ "learning_rate": 0.0002639049826187717,
1243
+ "loss": 0.1249,
1244
+ "step": 1720
1245
+ },
1246
+ {
1247
+ "epoch": 3.42,
1248
+ "learning_rate": 0.00026245654692931634,
1249
+ "loss": 0.1356,
1250
+ "step": 1730
1251
+ },
1252
+ {
1253
+ "epoch": 3.44,
1254
+ "learning_rate": 0.000261008111239861,
1255
+ "loss": 0.1374,
1256
+ "step": 1740
1257
+ },
1258
+ {
1259
+ "epoch": 3.46,
1260
+ "learning_rate": 0.00025955967555040557,
1261
+ "loss": 0.2013,
1262
+ "step": 1750
1263
+ },
1264
+ {
1265
+ "epoch": 3.48,
1266
+ "learning_rate": 0.0002581112398609502,
1267
+ "loss": 0.1337,
1268
+ "step": 1760
1269
+ },
1270
+ {
1271
+ "epoch": 3.5,
1272
+ "learning_rate": 0.0002566628041714948,
1273
+ "loss": 0.1226,
1274
+ "step": 1770
1275
+ },
1276
+ {
1277
+ "epoch": 3.52,
1278
+ "learning_rate": 0.0002552143684820394,
1279
+ "loss": 0.1166,
1280
+ "step": 1780
1281
+ },
1282
+ {
1283
+ "epoch": 3.54,
1284
+ "learning_rate": 0.00025376593279258404,
1285
+ "loss": 0.2308,
1286
+ "step": 1790
1287
+ },
1288
+ {
1289
+ "epoch": 3.56,
1290
+ "learning_rate": 0.00025231749710312863,
1291
+ "loss": 0.1265,
1292
+ "step": 1800
1293
+ },
1294
+ {
1295
+ "epoch": 3.56,
1296
+ "eval_loss": 0.9203845858573914,
1297
+ "eval_rouge1": 0.17454545454545453,
1298
+ "eval_rouge2": 0.07291666666666667,
1299
+ "eval_rougeL": 0.16999999999999998,
1300
+ "eval_rougeLsum": 0.17727272727272728,
1301
+ "eval_runtime": 87.9704,
1302
+ "eval_samples_per_second": 0.227,
1303
+ "eval_steps_per_second": 0.227,
1304
+ "step": 1800
1305
+ },
1306
+ {
1307
+ "epoch": 3.58,
1308
+ "learning_rate": 0.0002508690614136732,
1309
+ "loss": 0.1526,
1310
+ "step": 1810
1311
+ },
1312
+ {
1313
+ "epoch": 3.6,
1314
+ "learning_rate": 0.00024942062572421786,
1315
+ "loss": 0.2201,
1316
+ "step": 1820
1317
+ },
1318
+ {
1319
+ "epoch": 3.62,
1320
+ "learning_rate": 0.00024797219003476245,
1321
+ "loss": 0.1271,
1322
+ "step": 1830
1323
+ },
1324
+ {
1325
+ "epoch": 3.64,
1326
+ "learning_rate": 0.0002465237543453071,
1327
+ "loss": 0.1749,
1328
+ "step": 1840
1329
+ },
1330
+ {
1331
+ "epoch": 3.65,
1332
+ "learning_rate": 0.0002450753186558517,
1333
+ "loss": 0.133,
1334
+ "step": 1850
1335
+ },
1336
+ {
1337
+ "epoch": 3.67,
1338
+ "learning_rate": 0.0002436268829663963,
1339
+ "loss": 0.2259,
1340
+ "step": 1860
1341
+ },
1342
+ {
1343
+ "epoch": 3.69,
1344
+ "learning_rate": 0.00024217844727694092,
1345
+ "loss": 0.1549,
1346
+ "step": 1870
1347
+ },
1348
+ {
1349
+ "epoch": 3.71,
1350
+ "learning_rate": 0.00024073001158748554,
1351
+ "loss": 0.1173,
1352
+ "step": 1880
1353
+ },
1354
+ {
1355
+ "epoch": 3.73,
1356
+ "learning_rate": 0.00023928157589803013,
1357
+ "loss": 0.1337,
1358
+ "step": 1890
1359
+ },
1360
+ {
1361
+ "epoch": 3.75,
1362
+ "learning_rate": 0.00023783314020857474,
1363
+ "loss": 0.1828,
1364
+ "step": 1900
1365
+ },
1366
+ {
1367
+ "epoch": 3.75,
1368
+ "eval_loss": 0.9094276428222656,
1369
+ "eval_rouge1": 0.18,
1370
+ "eval_rouge2": 0.14886363636363636,
1371
+ "eval_rougeL": 0.18,
1372
+ "eval_rougeLsum": 0.18615384615384614,
1373
+ "eval_runtime": 84.5106,
1374
+ "eval_samples_per_second": 0.237,
1375
+ "eval_steps_per_second": 0.237,
1376
+ "step": 1900
1377
+ },
1378
+ {
1379
+ "epoch": 3.77,
1380
+ "learning_rate": 0.00023638470451911936,
1381
+ "loss": 0.1821,
1382
+ "step": 1910
1383
+ },
1384
+ {
1385
+ "epoch": 3.79,
1386
+ "learning_rate": 0.00023493626882966395,
1387
+ "loss": 0.1257,
1388
+ "step": 1920
1389
+ },
1390
+ {
1391
+ "epoch": 3.81,
1392
+ "learning_rate": 0.00023348783314020857,
1393
+ "loss": 0.172,
1394
+ "step": 1930
1395
+ },
1396
+ {
1397
+ "epoch": 3.83,
1398
+ "learning_rate": 0.0002320393974507532,
1399
+ "loss": 0.1833,
1400
+ "step": 1940
1401
+ },
1402
+ {
1403
+ "epoch": 3.85,
1404
+ "learning_rate": 0.0002305909617612978,
1405
+ "loss": 0.1334,
1406
+ "step": 1950
1407
+ },
1408
+ {
1409
+ "epoch": 3.87,
1410
+ "learning_rate": 0.00022914252607184242,
1411
+ "loss": 0.1736,
1412
+ "step": 1960
1413
+ },
1414
+ {
1415
+ "epoch": 3.89,
1416
+ "learning_rate": 0.00022769409038238703,
1417
+ "loss": 0.1163,
1418
+ "step": 1970
1419
+ },
1420
+ {
1421
+ "epoch": 3.91,
1422
+ "learning_rate": 0.00022624565469293165,
1423
+ "loss": 0.1844,
1424
+ "step": 1980
1425
+ },
1426
+ {
1427
+ "epoch": 3.93,
1428
+ "learning_rate": 0.00022479721900347624,
1429
+ "loss": 0.1358,
1430
+ "step": 1990
1431
+ },
1432
+ {
1433
+ "epoch": 3.95,
1434
+ "learning_rate": 0.00022334878331402086,
1435
+ "loss": 0.1447,
1436
+ "step": 2000
1437
+ },
1438
+ {
1439
+ "epoch": 3.95,
1440
+ "eval_loss": 0.89415442943573,
1441
+ "eval_rouge1": 0.19,
1442
+ "eval_rouge2": 0.09886363636363635,
1443
+ "eval_rougeL": 0.18615384615384617,
1444
+ "eval_rougeLsum": 0.19615384615384615,
1445
+ "eval_runtime": 84.0506,
1446
+ "eval_samples_per_second": 0.238,
1447
+ "eval_steps_per_second": 0.238,
1448
+ "step": 2000
1449
  }
1450
  ],
1451
  "max_steps": 3542,
1452
  "num_train_epochs": 7,
1453
+ "total_flos": 4.862361100638413e+16,
1454
  "trial_name": null,
1455
  "trial_params": null
1456
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d3b0d701f16fe2a1ac911c75f337bfda79fe2f7f898cd1baabb210413c7dba
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4b616d5ea5f92c203d471e43b622d1302d66f27b9105c5655030fdf5c7e986
3
  size 2329702453
runs/Feb08_07-03-50_74bc69b4becb/events.out.tfevents.1675839843.74bc69b4becb.290.8 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8194d28430976b9d808ff3690e18217a68d80fd2f3d19f3bcfe785482800df6
3
- size 24674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89707d0ed5076499409b04d8894868bee0b330defbf8965e5c00268b2a46dfb4
3
+ size 34894