Federic commited on
Commit
b2e133e
1 Parent(s): ac03ccd

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:674226647c0af0371804ab2d68e3d707974b6551b87819cd9b51eaf76d96623d
3
  size 1822093912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae2eec5244c12cfc23d97a9e2c8e5f98d6a441ed54e5868a87af066796e63394
3
  size 1822093912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed5cf37ce34fe8faf97f2f26392f8fe21ffa3a7dfbb31c731986c8abfcd99633
3
  size 651839518
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d7c869d90bc65ff9162e260a3ad90b52db74336697ff0e09ff7df98ff31e84
3
  size 651839518
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:421f99707ecd0d99d7fb40197f028ceb02c2ffdc30698df74ad0b16e6900f2cf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23cb5c1762800647eb12638185cb6ac240d9d76953826c534f216d71fcd80242
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8,
5
  "eval_steps": 500,
6
- "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1357,13 +1357,163 @@
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.4529,
1359
  "step": 225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 2,
1365
  "save_steps": 25,
1366
- "total_flos": 2.907887080911667e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.4529,
1359
  "step": 225
1360
+ },
1361
+ {
1362
+ "epoch": 1.81,
1363
+ "learning_rate": 0.0002,
1364
+ "loss": 0.4786,
1365
+ "step": 226
1366
+ },
1367
+ {
1368
+ "epoch": 1.82,
1369
+ "learning_rate": 0.0002,
1370
+ "loss": 0.4656,
1371
+ "step": 227
1372
+ },
1373
+ {
1374
+ "epoch": 1.82,
1375
+ "learning_rate": 0.0002,
1376
+ "loss": 0.4418,
1377
+ "step": 228
1378
+ },
1379
+ {
1380
+ "epoch": 1.83,
1381
+ "learning_rate": 0.0002,
1382
+ "loss": 0.4452,
1383
+ "step": 229
1384
+ },
1385
+ {
1386
+ "epoch": 1.84,
1387
+ "learning_rate": 0.0002,
1388
+ "loss": 0.4603,
1389
+ "step": 230
1390
+ },
1391
+ {
1392
+ "epoch": 1.85,
1393
+ "learning_rate": 0.0002,
1394
+ "loss": 0.4569,
1395
+ "step": 231
1396
+ },
1397
+ {
1398
+ "epoch": 1.86,
1399
+ "learning_rate": 0.0002,
1400
+ "loss": 0.4226,
1401
+ "step": 232
1402
+ },
1403
+ {
1404
+ "epoch": 1.86,
1405
+ "learning_rate": 0.0002,
1406
+ "loss": 0.402,
1407
+ "step": 233
1408
+ },
1409
+ {
1410
+ "epoch": 1.87,
1411
+ "learning_rate": 0.0002,
1412
+ "loss": 0.4336,
1413
+ "step": 234
1414
+ },
1415
+ {
1416
+ "epoch": 1.88,
1417
+ "learning_rate": 0.0002,
1418
+ "loss": 0.4489,
1419
+ "step": 235
1420
+ },
1421
+ {
1422
+ "epoch": 1.89,
1423
+ "learning_rate": 0.0002,
1424
+ "loss": 0.4135,
1425
+ "step": 236
1426
+ },
1427
+ {
1428
+ "epoch": 1.9,
1429
+ "learning_rate": 0.0002,
1430
+ "loss": 0.411,
1431
+ "step": 237
1432
+ },
1433
+ {
1434
+ "epoch": 1.9,
1435
+ "learning_rate": 0.0002,
1436
+ "loss": 0.3589,
1437
+ "step": 238
1438
+ },
1439
+ {
1440
+ "epoch": 1.91,
1441
+ "learning_rate": 0.0002,
1442
+ "loss": 0.3683,
1443
+ "step": 239
1444
+ },
1445
+ {
1446
+ "epoch": 1.92,
1447
+ "learning_rate": 0.0002,
1448
+ "loss": 0.3443,
1449
+ "step": 240
1450
+ },
1451
+ {
1452
+ "epoch": 1.93,
1453
+ "learning_rate": 0.0002,
1454
+ "loss": 0.3332,
1455
+ "step": 241
1456
+ },
1457
+ {
1458
+ "epoch": 1.94,
1459
+ "learning_rate": 0.0002,
1460
+ "loss": 0.3668,
1461
+ "step": 242
1462
+ },
1463
+ {
1464
+ "epoch": 1.94,
1465
+ "learning_rate": 0.0002,
1466
+ "loss": 0.3168,
1467
+ "step": 243
1468
+ },
1469
+ {
1470
+ "epoch": 1.95,
1471
+ "learning_rate": 0.0002,
1472
+ "loss": 0.3434,
1473
+ "step": 244
1474
+ },
1475
+ {
1476
+ "epoch": 1.96,
1477
+ "learning_rate": 0.0002,
1478
+ "loss": 0.303,
1479
+ "step": 245
1480
+ },
1481
+ {
1482
+ "epoch": 1.97,
1483
+ "learning_rate": 0.0002,
1484
+ "loss": 0.2891,
1485
+ "step": 246
1486
+ },
1487
+ {
1488
+ "epoch": 1.98,
1489
+ "learning_rate": 0.0002,
1490
+ "loss": 0.3218,
1491
+ "step": 247
1492
+ },
1493
+ {
1494
+ "epoch": 1.98,
1495
+ "learning_rate": 0.0002,
1496
+ "loss": 0.3089,
1497
+ "step": 248
1498
+ },
1499
+ {
1500
+ "epoch": 1.99,
1501
+ "learning_rate": 0.0002,
1502
+ "loss": 0.3189,
1503
+ "step": 249
1504
+ },
1505
+ {
1506
+ "epoch": 2.0,
1507
+ "learning_rate": 0.0002,
1508
+ "loss": 0.4472,
1509
+ "step": 250
1510
  }
1511
  ],
1512
  "logging_steps": 1,
1513
  "max_steps": 250,
1514
  "num_train_epochs": 2,
1515
  "save_steps": 25,
1516
+ "total_flos": 3.210168125010739e+16,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }