nbasatish commited on
Commit
7418379
1 Parent(s): da4a46a

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +255 -5
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 54.3016,
3
- "best_model_checkpoint": "/uoa/home/s02sd1/Desktop/Project data/pytorch_project/facebook_28.03/checkpoint-43000",
4
- "epoch": 64.88011283497885,
5
- "global_step": 46000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1156,11 +1156,261 @@
1156
  "eval_samples_per_second": 0.062,
1157
  "eval_steps_per_second": 0.003,
1158
  "step": 46000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1159
  }
1160
  ],
1161
  "max_steps": 70900,
1162
  "num_train_epochs": 100,
1163
- "total_flos": 7.747033211322532e+17,
1164
  "trial_name": null,
1165
  "trial_params": null
1166
  }
 
1
  {
2
+ "best_metric": 54.4377,
3
+ "best_model_checkpoint": "/uoa/home/s02sd1/Desktop/Project data/pytorch_project/facebook_28.03/checkpoint-53000",
4
+ "epoch": 78.98448519040903,
5
+ "global_step": 56000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1156
  "eval_samples_per_second": 0.062,
1157
  "eval_steps_per_second": 0.003,
1158
  "step": 46000
1159
+ },
1160
+ {
1161
+ "epoch": 65.59,
1162
+ "learning_rate": 1.0324400564174895e-06,
1163
+ "loss": 0.0157,
1164
+ "step": 46500
1165
+ },
1166
+ {
1167
+ "epoch": 66.29,
1168
+ "learning_rate": 1.011283497884344e-06,
1169
+ "loss": 0.0152,
1170
+ "step": 47000
1171
+ },
1172
+ {
1173
+ "epoch": 66.29,
1174
+ "eval_gen_len": 162.8547,
1175
+ "eval_loss": 1.3345621824264526,
1176
+ "eval_rouge1": 69.2641,
1177
+ "eval_rouge2": 53.4141,
1178
+ "eval_rougeL": 55.3861,
1179
+ "eval_rougeLsum": 59.6768,
1180
+ "eval_runtime": 5757.5178,
1181
+ "eval_samples_per_second": 0.061,
1182
+ "eval_steps_per_second": 0.003,
1183
+ "step": 47000
1184
+ },
1185
+ {
1186
+ "epoch": 67.0,
1187
+ "learning_rate": 9.901269393511988e-07,
1188
+ "loss": 0.0157,
1189
+ "step": 47500
1190
+ },
1191
+ {
1192
+ "epoch": 67.7,
1193
+ "learning_rate": 9.689703808180537e-07,
1194
+ "loss": 0.0149,
1195
+ "step": 48000
1196
+ },
1197
+ {
1198
+ "epoch": 67.7,
1199
+ "eval_gen_len": 163.0684,
1200
+ "eval_loss": 1.3516221046447754,
1201
+ "eval_rouge1": 69.3874,
1202
+ "eval_rouge2": 53.707,
1203
+ "eval_rougeL": 56.0318,
1204
+ "eval_rougeLsum": 60.1116,
1205
+ "eval_runtime": 5778.0873,
1206
+ "eval_samples_per_second": 0.061,
1207
+ "eval_steps_per_second": 0.003,
1208
+ "step": 48000
1209
+ },
1210
+ {
1211
+ "epoch": 68.41,
1212
+ "learning_rate": 9.478138222849083e-07,
1213
+ "loss": 0.014,
1214
+ "step": 48500
1215
+ },
1216
+ {
1217
+ "epoch": 69.11,
1218
+ "learning_rate": 9.266572637517632e-07,
1219
+ "loss": 0.0143,
1220
+ "step": 49000
1221
+ },
1222
+ {
1223
+ "epoch": 69.11,
1224
+ "eval_gen_len": 163.4387,
1225
+ "eval_loss": 1.3576686382293701,
1226
+ "eval_rouge1": 69.5713,
1227
+ "eval_rouge2": 53.8362,
1228
+ "eval_rougeL": 55.9625,
1229
+ "eval_rougeLsum": 60.2185,
1230
+ "eval_runtime": 6313.8347,
1231
+ "eval_samples_per_second": 0.056,
1232
+ "eval_steps_per_second": 0.003,
1233
+ "step": 49000
1234
+ },
1235
+ {
1236
+ "epoch": 69.82,
1237
+ "learning_rate": 9.055007052186177e-07,
1238
+ "loss": 0.0141,
1239
+ "step": 49500
1240
+ },
1241
+ {
1242
+ "epoch": 70.52,
1243
+ "learning_rate": 8.843441466854725e-07,
1244
+ "loss": 0.0139,
1245
+ "step": 50000
1246
+ },
1247
+ {
1248
+ "epoch": 70.52,
1249
+ "eval_gen_len": 161.9259,
1250
+ "eval_loss": 1.346762776374817,
1251
+ "eval_rouge1": 69.5609,
1252
+ "eval_rouge2": 53.9709,
1253
+ "eval_rougeL": 56.0658,
1254
+ "eval_rougeLsum": 60.0989,
1255
+ "eval_runtime": 5992.2258,
1256
+ "eval_samples_per_second": 0.059,
1257
+ "eval_steps_per_second": 0.003,
1258
+ "step": 50000
1259
+ },
1260
+ {
1261
+ "epoch": 71.23,
1262
+ "learning_rate": 8.631875881523273e-07,
1263
+ "loss": 0.0136,
1264
+ "step": 50500
1265
+ },
1266
+ {
1267
+ "epoch": 71.93,
1268
+ "learning_rate": 8.42031029619182e-07,
1269
+ "loss": 0.0132,
1270
+ "step": 51000
1271
+ },
1272
+ {
1273
+ "epoch": 71.93,
1274
+ "eval_gen_len": 161.208,
1275
+ "eval_loss": 1.3650970458984375,
1276
+ "eval_rouge1": 69.5418,
1277
+ "eval_rouge2": 53.7154,
1278
+ "eval_rougeL": 55.7361,
1279
+ "eval_rougeLsum": 60.0613,
1280
+ "eval_runtime": 7093.4012,
1281
+ "eval_samples_per_second": 0.049,
1282
+ "eval_steps_per_second": 0.003,
1283
+ "step": 51000
1284
+ },
1285
+ {
1286
+ "epoch": 72.64,
1287
+ "learning_rate": 8.208744710860367e-07,
1288
+ "loss": 0.0131,
1289
+ "step": 51500
1290
+ },
1291
+ {
1292
+ "epoch": 73.34,
1293
+ "learning_rate": 7.997179125528913e-07,
1294
+ "loss": 0.0127,
1295
+ "step": 52000
1296
+ },
1297
+ {
1298
+ "epoch": 73.34,
1299
+ "eval_gen_len": 162.2849,
1300
+ "eval_loss": 1.3561229705810547,
1301
+ "eval_rouge1": 69.4318,
1302
+ "eval_rouge2": 53.5887,
1303
+ "eval_rougeL": 55.4159,
1304
+ "eval_rougeLsum": 59.6162,
1305
+ "eval_runtime": 6162.4321,
1306
+ "eval_samples_per_second": 0.057,
1307
+ "eval_steps_per_second": 0.003,
1308
+ "step": 52000
1309
+ },
1310
+ {
1311
+ "epoch": 74.05,
1312
+ "learning_rate": 7.785613540197461e-07,
1313
+ "loss": 0.0127,
1314
+ "step": 52500
1315
+ },
1316
+ {
1317
+ "epoch": 74.75,
1318
+ "learning_rate": 7.574047954866008e-07,
1319
+ "loss": 0.0123,
1320
+ "step": 53000
1321
+ },
1322
+ {
1323
+ "epoch": 74.75,
1324
+ "eval_gen_len": 159.567,
1325
+ "eval_loss": 1.3640151023864746,
1326
+ "eval_rouge1": 69.9555,
1327
+ "eval_rouge2": 54.4377,
1328
+ "eval_rougeL": 56.2914,
1329
+ "eval_rougeLsum": 60.5544,
1330
+ "eval_runtime": 6099.51,
1331
+ "eval_samples_per_second": 0.058,
1332
+ "eval_steps_per_second": 0.003,
1333
+ "step": 53000
1334
+ },
1335
+ {
1336
+ "epoch": 75.46,
1337
+ "learning_rate": 7.362482369534556e-07,
1338
+ "loss": 0.0123,
1339
+ "step": 53500
1340
+ },
1341
+ {
1342
+ "epoch": 76.16,
1343
+ "learning_rate": 7.150916784203103e-07,
1344
+ "loss": 0.0119,
1345
+ "step": 54000
1346
+ },
1347
+ {
1348
+ "epoch": 76.16,
1349
+ "eval_gen_len": 162.0228,
1350
+ "eval_loss": 1.3737815618515015,
1351
+ "eval_rouge1": 69.6472,
1352
+ "eval_rouge2": 53.9313,
1353
+ "eval_rougeL": 55.9586,
1354
+ "eval_rougeLsum": 59.9692,
1355
+ "eval_runtime": 6462.5491,
1356
+ "eval_samples_per_second": 0.054,
1357
+ "eval_steps_per_second": 0.003,
1358
+ "step": 54000
1359
+ },
1360
+ {
1361
+ "epoch": 76.87,
1362
+ "learning_rate": 6.93935119887165e-07,
1363
+ "loss": 0.0118,
1364
+ "step": 54500
1365
+ },
1366
+ {
1367
+ "epoch": 77.57,
1368
+ "learning_rate": 6.727785613540198e-07,
1369
+ "loss": 0.0117,
1370
+ "step": 55000
1371
+ },
1372
+ {
1373
+ "epoch": 77.57,
1374
+ "eval_gen_len": 162.3789,
1375
+ "eval_loss": 1.3787622451782227,
1376
+ "eval_rouge1": 69.5463,
1377
+ "eval_rouge2": 53.7206,
1378
+ "eval_rougeL": 56.0721,
1379
+ "eval_rougeLsum": 60.3372,
1380
+ "eval_runtime": 6234.7587,
1381
+ "eval_samples_per_second": 0.056,
1382
+ "eval_steps_per_second": 0.003,
1383
+ "step": 55000
1384
+ },
1385
+ {
1386
+ "epoch": 78.28,
1387
+ "learning_rate": 6.516220028208745e-07,
1388
+ "loss": 0.0114,
1389
+ "step": 55500
1390
+ },
1391
+ {
1392
+ "epoch": 78.98,
1393
+ "learning_rate": 6.304654442877292e-07,
1394
+ "loss": 0.0114,
1395
+ "step": 56000
1396
+ },
1397
+ {
1398
+ "epoch": 78.98,
1399
+ "eval_gen_len": 162.1083,
1400
+ "eval_loss": 1.3712390661239624,
1401
+ "eval_rouge1": 69.5879,
1402
+ "eval_rouge2": 53.9682,
1403
+ "eval_rougeL": 56.2965,
1404
+ "eval_rougeLsum": 60.5547,
1405
+ "eval_runtime": 6229.6909,
1406
+ "eval_samples_per_second": 0.056,
1407
+ "eval_steps_per_second": 0.003,
1408
+ "step": 56000
1409
  }
1410
  ],
1411
  "max_steps": 70900,
1412
  "num_train_epochs": 100,
1413
+ "total_flos": 9.43193629959635e+17,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }