Federic commited on
Commit
eff28af
1 Parent(s): 5fb47fd

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:909164143e946a79544695ae4729ed406738f0ec6408ba0da707119f7581fbf9
3
  size 838904832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54c4947058fd8e7ee421b0b46fc349022d4a1f3f8246ef6c3d3b6bda09b72f50
3
  size 838904832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b25d5b7c08a71ac14f6ba0f530f590caddf977889f0c4b04c9812dd1f4fa5fa4
3
  size 420633876
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a651b2e845e714d6bbbfe26616bd79e994bd844b56c8ce9664fd66902226483
3
  size 420633876
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c19e042a9bfc28144451bbf43f82dc0a8daf6a85a49c45d8ec4eb8089fe161b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1f20496eb68bf92a3c0bedec6630dbaf08516bee744709362b85a6b5810eb0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59a103009f3230e51c40288ef6a33247523fa398934878b1e22a81660cbade8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9,
5
  "eval_steps": 500,
6
- "global_step": 225,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1357,13 +1357,163 @@
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.4565,
1359
  "step": 225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1360
  }
1361
  ],
1362
  "logging_steps": 1,
1363
  "max_steps": 250,
1364
  "num_train_epochs": 1,
1365
  "save_steps": 25,
1366
- "total_flos": 2.72384291254272e+16,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1357
  "learning_rate": 0.0002,
1358
  "loss": 0.4565,
1359
  "step": 225
1360
+ },
1361
+ {
1362
+ "epoch": 0.9,
1363
+ "learning_rate": 0.0002,
1364
+ "loss": 0.4283,
1365
+ "step": 226
1366
+ },
1367
+ {
1368
+ "epoch": 0.91,
1369
+ "learning_rate": 0.0002,
1370
+ "loss": 0.4522,
1371
+ "step": 227
1372
+ },
1373
+ {
1374
+ "epoch": 0.91,
1375
+ "learning_rate": 0.0002,
1376
+ "loss": 0.4712,
1377
+ "step": 228
1378
+ },
1379
+ {
1380
+ "epoch": 0.92,
1381
+ "learning_rate": 0.0002,
1382
+ "loss": 0.5018,
1383
+ "step": 229
1384
+ },
1385
+ {
1386
+ "epoch": 0.92,
1387
+ "learning_rate": 0.0002,
1388
+ "loss": 0.4691,
1389
+ "step": 230
1390
+ },
1391
+ {
1392
+ "epoch": 0.92,
1393
+ "learning_rate": 0.0002,
1394
+ "loss": 0.4537,
1395
+ "step": 231
1396
+ },
1397
+ {
1398
+ "epoch": 0.93,
1399
+ "learning_rate": 0.0002,
1400
+ "loss": 0.4482,
1401
+ "step": 232
1402
+ },
1403
+ {
1404
+ "epoch": 0.93,
1405
+ "learning_rate": 0.0002,
1406
+ "loss": 0.4369,
1407
+ "step": 233
1408
+ },
1409
+ {
1410
+ "epoch": 0.94,
1411
+ "learning_rate": 0.0002,
1412
+ "loss": 0.4772,
1413
+ "step": 234
1414
+ },
1415
+ {
1416
+ "epoch": 0.94,
1417
+ "learning_rate": 0.0002,
1418
+ "loss": 0.4546,
1419
+ "step": 235
1420
+ },
1421
+ {
1422
+ "epoch": 0.94,
1423
+ "learning_rate": 0.0002,
1424
+ "loss": 0.4321,
1425
+ "step": 236
1426
+ },
1427
+ {
1428
+ "epoch": 0.95,
1429
+ "learning_rate": 0.0002,
1430
+ "loss": 0.3777,
1431
+ "step": 237
1432
+ },
1433
+ {
1434
+ "epoch": 0.95,
1435
+ "learning_rate": 0.0002,
1436
+ "loss": 0.4099,
1437
+ "step": 238
1438
+ },
1439
+ {
1440
+ "epoch": 0.96,
1441
+ "learning_rate": 0.0002,
1442
+ "loss": 0.3694,
1443
+ "step": 239
1444
+ },
1445
+ {
1446
+ "epoch": 0.96,
1447
+ "learning_rate": 0.0002,
1448
+ "loss": 0.3902,
1449
+ "step": 240
1450
+ },
1451
+ {
1452
+ "epoch": 0.96,
1453
+ "learning_rate": 0.0002,
1454
+ "loss": 0.3989,
1455
+ "step": 241
1456
+ },
1457
+ {
1458
+ "epoch": 0.97,
1459
+ "learning_rate": 0.0002,
1460
+ "loss": 0.4031,
1461
+ "step": 242
1462
+ },
1463
+ {
1464
+ "epoch": 0.97,
1465
+ "learning_rate": 0.0002,
1466
+ "loss": 0.4104,
1467
+ "step": 243
1468
+ },
1469
+ {
1470
+ "epoch": 0.98,
1471
+ "learning_rate": 0.0002,
1472
+ "loss": 0.3533,
1473
+ "step": 244
1474
+ },
1475
+ {
1476
+ "epoch": 0.98,
1477
+ "learning_rate": 0.0002,
1478
+ "loss": 0.364,
1479
+ "step": 245
1480
+ },
1481
+ {
1482
+ "epoch": 0.98,
1483
+ "learning_rate": 0.0002,
1484
+ "loss": 0.3696,
1485
+ "step": 246
1486
+ },
1487
+ {
1488
+ "epoch": 0.99,
1489
+ "learning_rate": 0.0002,
1490
+ "loss": 0.3252,
1491
+ "step": 247
1492
+ },
1493
+ {
1494
+ "epoch": 0.99,
1495
+ "learning_rate": 0.0002,
1496
+ "loss": 0.3523,
1497
+ "step": 248
1498
+ },
1499
+ {
1500
+ "epoch": 1.0,
1501
+ "learning_rate": 0.0002,
1502
+ "loss": 0.3395,
1503
+ "step": 249
1504
+ },
1505
+ {
1506
+ "epoch": 1.0,
1507
+ "learning_rate": 0.0002,
1508
+ "loss": 0.3503,
1509
+ "step": 250
1510
  }
1511
  ],
1512
  "logging_steps": 1,
1513
  "max_steps": 250,
1514
  "num_train_epochs": 1,
1515
  "save_steps": 25,
1516
+ "total_flos": 2.990177006051328e+16,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }