joelniklaus commited on
Commit
1ee3756
1 Parent(s): 80f58b1

Training in progress, step 250000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b7d49319544a72931f7ee076c508b1a724907fad83b5a899a60bdac334a62f5
3
  size 2693742553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e04ca16da996f0e0755c7bb47cbdc9c3ec1b41a56b0b2abba36be8a2834caa
3
  size 2693742553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc050b6b6cea13715c082c4efde1df4eb7f489a44d07d86d55ddcf0803ea7d10
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4632b32f5bbe6941ef0b01b4af76b44c6852cf11ba1c74c93c8b0bc13e36cb68
3
  size 1346893675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9eaaa7cc088403f9a31ab70b2e1791d125e5bf39dbc05085cce4adba73595
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f96e135872bf2c130b0a7670bbcf66b62845d2914648d795677f705c0fbf7e1
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1ccf49f4804619cd7d22b74b595a694a368e629a10492b4089d6536d07bdf2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7924e9d3f9ed054868d3ddaa60025f26707d231e7eacc5684e8550acfee9e9c0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1238,11 +1238,319 @@
1238
  "eval_samples_per_second": 206.878,
1239
  "eval_steps_per_second": 3.269,
1240
  "step": 200000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1241
  }
1242
  ],
1243
  "max_steps": 1000000,
1244
  "num_train_epochs": 9223372036854775807,
1245
- "total_flos": 1.19299796631552e+19,
1246
  "trial_name": null,
1247
  "trial_params": null
1248
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.25,
5
+ "global_step": 250000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1238
  "eval_samples_per_second": 206.878,
1239
  "eval_steps_per_second": 3.269,
1240
  "step": 200000
1241
+ },
1242
+ {
1243
+ "epoch": 0.2,
1244
+ "learning_rate": 9.389475079423988e-05,
1245
+ "loss": 0.8939,
1246
+ "step": 201000
1247
+ },
1248
+ {
1249
+ "epoch": 0.2,
1250
+ "learning_rate": 9.381533400219318e-05,
1251
+ "loss": 0.9112,
1252
+ "step": 202000
1253
+ },
1254
+ {
1255
+ "epoch": 0.2,
1256
+ "learning_rate": 9.373543805267368e-05,
1257
+ "loss": 0.8656,
1258
+ "step": 203000
1259
+ },
1260
+ {
1261
+ "epoch": 0.2,
1262
+ "learning_rate": 9.365506381941066e-05,
1263
+ "loss": 0.852,
1264
+ "step": 204000
1265
+ },
1266
+ {
1267
+ "epoch": 0.2,
1268
+ "learning_rate": 9.357421218136386e-05,
1269
+ "loss": 0.8976,
1270
+ "step": 205000
1271
+ },
1272
+ {
1273
+ "epoch": 0.21,
1274
+ "learning_rate": 9.349288402271388e-05,
1275
+ "loss": 0.6997,
1276
+ "step": 206000
1277
+ },
1278
+ {
1279
+ "epoch": 0.21,
1280
+ "learning_rate": 9.341108023285238e-05,
1281
+ "loss": 0.8927,
1282
+ "step": 207000
1283
+ },
1284
+ {
1285
+ "epoch": 0.21,
1286
+ "learning_rate": 9.332880170637252e-05,
1287
+ "loss": 0.8809,
1288
+ "step": 208000
1289
+ },
1290
+ {
1291
+ "epoch": 0.21,
1292
+ "learning_rate": 9.32460493430591e-05,
1293
+ "loss": 0.9284,
1294
+ "step": 209000
1295
+ },
1296
+ {
1297
+ "epoch": 0.21,
1298
+ "learning_rate": 9.316282404787871e-05,
1299
+ "loss": 1.0261,
1300
+ "step": 210000
1301
+ },
1302
+ {
1303
+ "epoch": 0.21,
1304
+ "learning_rate": 9.30791267309698e-05,
1305
+ "loss": 1.1077,
1306
+ "step": 211000
1307
+ },
1308
+ {
1309
+ "epoch": 0.21,
1310
+ "learning_rate": 9.299495830763286e-05,
1311
+ "loss": 1.0381,
1312
+ "step": 212000
1313
+ },
1314
+ {
1315
+ "epoch": 0.21,
1316
+ "learning_rate": 9.291031969832026e-05,
1317
+ "loss": 0.9225,
1318
+ "step": 213000
1319
+ },
1320
+ {
1321
+ "epoch": 0.21,
1322
+ "learning_rate": 9.282521182862629e-05,
1323
+ "loss": 1.0013,
1324
+ "step": 214000
1325
+ },
1326
+ {
1327
+ "epoch": 0.21,
1328
+ "learning_rate": 9.273963562927695e-05,
1329
+ "loss": 0.8487,
1330
+ "step": 215000
1331
+ },
1332
+ {
1333
+ "epoch": 0.22,
1334
+ "learning_rate": 9.265359203611987e-05,
1335
+ "loss": 0.9014,
1336
+ "step": 216000
1337
+ },
1338
+ {
1339
+ "epoch": 0.22,
1340
+ "learning_rate": 9.256708199011401e-05,
1341
+ "loss": 0.9586,
1342
+ "step": 217000
1343
+ },
1344
+ {
1345
+ "epoch": 0.22,
1346
+ "learning_rate": 9.248010643731935e-05,
1347
+ "loss": 0.8322,
1348
+ "step": 218000
1349
+ },
1350
+ {
1351
+ "epoch": 0.22,
1352
+ "learning_rate": 9.239266632888659e-05,
1353
+ "loss": 0.8878,
1354
+ "step": 219000
1355
+ },
1356
+ {
1357
+ "epoch": 0.22,
1358
+ "learning_rate": 9.230476262104677e-05,
1359
+ "loss": 0.8508,
1360
+ "step": 220000
1361
+ },
1362
+ {
1363
+ "epoch": 0.22,
1364
+ "learning_rate": 9.221639627510076e-05,
1365
+ "loss": 0.8793,
1366
+ "step": 221000
1367
+ },
1368
+ {
1369
+ "epoch": 0.22,
1370
+ "learning_rate": 9.212756825740873e-05,
1371
+ "loss": 0.8577,
1372
+ "step": 222000
1373
+ },
1374
+ {
1375
+ "epoch": 0.22,
1376
+ "learning_rate": 9.20382795393797e-05,
1377
+ "loss": 0.9545,
1378
+ "step": 223000
1379
+ },
1380
+ {
1381
+ "epoch": 0.22,
1382
+ "learning_rate": 9.194853109746074e-05,
1383
+ "loss": 1.0083,
1384
+ "step": 224000
1385
+ },
1386
+ {
1387
+ "epoch": 0.23,
1388
+ "learning_rate": 9.185832391312644e-05,
1389
+ "loss": 1.0004,
1390
+ "step": 225000
1391
+ },
1392
+ {
1393
+ "epoch": 0.23,
1394
+ "learning_rate": 9.176765897286813e-05,
1395
+ "loss": 0.8553,
1396
+ "step": 226000
1397
+ },
1398
+ {
1399
+ "epoch": 0.23,
1400
+ "learning_rate": 9.167653726818305e-05,
1401
+ "loss": 0.9164,
1402
+ "step": 227000
1403
+ },
1404
+ {
1405
+ "epoch": 0.23,
1406
+ "learning_rate": 9.158495979556358e-05,
1407
+ "loss": 0.8879,
1408
+ "step": 228000
1409
+ },
1410
+ {
1411
+ "epoch": 0.23,
1412
+ "learning_rate": 9.14929275564863e-05,
1413
+ "loss": 0.9265,
1414
+ "step": 229000
1415
+ },
1416
+ {
1417
+ "epoch": 0.23,
1418
+ "learning_rate": 9.140044155740101e-05,
1419
+ "loss": 1.0602,
1420
+ "step": 230000
1421
+ },
1422
+ {
1423
+ "epoch": 0.23,
1424
+ "learning_rate": 9.130750280971978e-05,
1425
+ "loss": 1.0829,
1426
+ "step": 231000
1427
+ },
1428
+ {
1429
+ "epoch": 0.23,
1430
+ "learning_rate": 9.121411232980588e-05,
1431
+ "loss": 0.9123,
1432
+ "step": 232000
1433
+ },
1434
+ {
1435
+ "epoch": 0.23,
1436
+ "learning_rate": 9.112027113896262e-05,
1437
+ "loss": 0.8953,
1438
+ "step": 233000
1439
+ },
1440
+ {
1441
+ "epoch": 0.23,
1442
+ "learning_rate": 9.102598026342222e-05,
1443
+ "loss": 1.0354,
1444
+ "step": 234000
1445
+ },
1446
+ {
1447
+ "epoch": 0.23,
1448
+ "learning_rate": 9.093124073433463e-05,
1449
+ "loss": 0.6453,
1450
+ "step": 235000
1451
+ },
1452
+ {
1453
+ "epoch": 0.24,
1454
+ "learning_rate": 9.083605358775612e-05,
1455
+ "loss": 0.6108,
1456
+ "step": 236000
1457
+ },
1458
+ {
1459
+ "epoch": 0.24,
1460
+ "learning_rate": 9.074041986463808e-05,
1461
+ "loss": 0.7116,
1462
+ "step": 237000
1463
+ },
1464
+ {
1465
+ "epoch": 0.24,
1466
+ "learning_rate": 9.064434061081562e-05,
1467
+ "loss": 0.9821,
1468
+ "step": 238000
1469
+ },
1470
+ {
1471
+ "epoch": 0.24,
1472
+ "learning_rate": 9.0547816876996e-05,
1473
+ "loss": 0.8962,
1474
+ "step": 239000
1475
+ },
1476
+ {
1477
+ "epoch": 0.24,
1478
+ "learning_rate": 9.045084971874738e-05,
1479
+ "loss": 0.8552,
1480
+ "step": 240000
1481
+ },
1482
+ {
1483
+ "epoch": 0.24,
1484
+ "learning_rate": 9.035344019648702e-05,
1485
+ "loss": 0.9197,
1486
+ "step": 241000
1487
+ },
1488
+ {
1489
+ "epoch": 0.24,
1490
+ "learning_rate": 9.025558937546988e-05,
1491
+ "loss": 0.9253,
1492
+ "step": 242000
1493
+ },
1494
+ {
1495
+ "epoch": 0.24,
1496
+ "learning_rate": 9.015729832577681e-05,
1497
+ "loss": 0.8837,
1498
+ "step": 243000
1499
+ },
1500
+ {
1501
+ "epoch": 0.24,
1502
+ "learning_rate": 9.005856812230304e-05,
1503
+ "loss": 0.8883,
1504
+ "step": 244000
1505
+ },
1506
+ {
1507
+ "epoch": 0.24,
1508
+ "learning_rate": 8.995939984474624e-05,
1509
+ "loss": 0.9024,
1510
+ "step": 245000
1511
+ },
1512
+ {
1513
+ "epoch": 0.25,
1514
+ "learning_rate": 8.98597945775948e-05,
1515
+ "loss": 0.899,
1516
+ "step": 246000
1517
+ },
1518
+ {
1519
+ "epoch": 0.25,
1520
+ "learning_rate": 8.975975341011596e-05,
1521
+ "loss": 0.9336,
1522
+ "step": 247000
1523
+ },
1524
+ {
1525
+ "epoch": 0.25,
1526
+ "learning_rate": 8.965927743634391e-05,
1527
+ "loss": 0.8503,
1528
+ "step": 248000
1529
+ },
1530
+ {
1531
+ "epoch": 0.25,
1532
+ "learning_rate": 8.955836775506776e-05,
1533
+ "loss": 0.9448,
1534
+ "step": 249000
1535
+ },
1536
+ {
1537
+ "epoch": 0.25,
1538
+ "learning_rate": 8.945702546981969e-05,
1539
+ "loss": 0.8713,
1540
+ "step": 250000
1541
+ },
1542
+ {
1543
+ "epoch": 0.25,
1544
+ "eval_loss": 0.6120243072509766,
1545
+ "eval_runtime": 24.3453,
1546
+ "eval_samples_per_second": 205.378,
1547
+ "eval_steps_per_second": 3.245,
1548
+ "step": 250000
1549
  }
1550
  ],
1551
  "max_steps": 1000000,
1552
  "num_train_epochs": 9223372036854775807,
1553
+ "total_flos": 1.4912474578944e+19,
1554
  "trial_name": null,
1555
  "trial_params": null
1556
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc050b6b6cea13715c082c4efde1df4eb7f489a44d07d86d55ddcf0803ea7d10
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4632b32f5bbe6941ef0b01b4af76b44c6852cf11ba1c74c93c8b0bc13e36cb68
3
  size 1346893675
runs/Feb25_19-21-23_t1v-n-eeadb94b-w-0/events.out.tfevents.1677352917.t1v-n-eeadb94b-w-0.615717.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2516320f139f5ebb64cdc6c31161aa5945f0ee016de07dcd90c6f7d4a8007155
3
- size 28664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394663698a9429b74f6870c31cb8b4ec916b7745904062f586cb9d6faf95fc7f
3
+ size 36940