Hans-Christian BΓΈge Pedersen commited on
Commit
b97ad63
β€’
1 Parent(s): 2515262

Training in progress, step 12200

Browse files
{checkpoint-11600 β†’ checkpoint-12200}/config.json RENAMED
File without changes
{checkpoint-11600 β†’ checkpoint-12200}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba9ebd2cb340a71369a53abe1497f25128c8aabf9d1330dc4084498a66707958
3
  size 330501178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc515a01e9e2331272e2688ab26d150060ee5b4d18b46a5039892b1fe7a60bc9
3
  size 330501178
{checkpoint-11600 β†’ checkpoint-12200}/preprocessor_config.json RENAMED
File without changes
{checkpoint-11600 β†’ checkpoint-12200}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6be8a1e7c46e218316192ae1521c8cc3fea19e48bea7a4413093de0babd368f
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3d6ecb7d0de9b755ff2254f0b23a59bdebac0f0720a7a7f6bf6137dda88820
3
  size 166628834
{checkpoint-11600 β†’ checkpoint-12200}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bbe03c165858e427bf1690c78aca8d29770dca019e5d87ae12c45ed31926031
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4035ceaade5bbf92f1d33853f1f79f9f758c53575371225b56f7833007fbb057
3
+ size 14308
{checkpoint-11600 β†’ checkpoint-12200}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9eef046ab22a76d8c7502f6eed706ed994dc76c1182ca7dc0096a858466626b
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de62983e80550d154137c9abb3ad8c5a0b825753ca3b5128cd86472366c6609
3
  size 988
{checkpoint-11600 β†’ checkpoint-12200}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eae3e6dc44dcb169f3179fa7f6c9297649ce7525dbcd15721ef74297edd0ca3d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442fd94f442ef422e03ea26344ef677e888ff84597c1ac6626f845997b27ddfb
3
  size 1064
{checkpoint-11600 β†’ checkpoint-12200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.2040661219836597,
5
- "global_step": 11600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1398,11 +1398,83 @@
1398
  "learning_rate": 7.798593957818735e-06,
1399
  "loss": 0.9419,
1400
  "step": 11600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1401
  }
1402
  ],
1403
  "max_steps": 52630,
1404
  "num_train_epochs": 10,
1405
- "total_flos": 4.434980288707584e+19,
1406
  "trial_name": null,
1407
  "trial_params": null
1408
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3180695420862625,
5
+ "global_step": 12200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1398
  "learning_rate": 7.798593957818735e-06,
1399
  "loss": 0.9419,
1400
  "step": 11600
1401
+ },
1402
+ {
1403
+ "epoch": 2.21,
1404
+ "learning_rate": 7.789093672810185e-06,
1405
+ "loss": 0.9078,
1406
+ "step": 11650
1407
+ },
1408
+ {
1409
+ "epoch": 2.22,
1410
+ "learning_rate": 7.779593387801634e-06,
1411
+ "loss": 0.9313,
1412
+ "step": 11700
1413
+ },
1414
+ {
1415
+ "epoch": 2.23,
1416
+ "learning_rate": 7.770093102793086e-06,
1417
+ "loss": 0.9452,
1418
+ "step": 11750
1419
+ },
1420
+ {
1421
+ "epoch": 2.24,
1422
+ "learning_rate": 7.760592817784535e-06,
1423
+ "loss": 0.9401,
1424
+ "step": 11800
1425
+ },
1426
+ {
1427
+ "epoch": 2.25,
1428
+ "learning_rate": 7.751092532775983e-06,
1429
+ "loss": 0.9278,
1430
+ "step": 11850
1431
+ },
1432
+ {
1433
+ "epoch": 2.26,
1434
+ "learning_rate": 7.741592247767434e-06,
1435
+ "loss": 0.931,
1436
+ "step": 11900
1437
+ },
1438
+ {
1439
+ "epoch": 2.27,
1440
+ "learning_rate": 7.732091962758883e-06,
1441
+ "loss": 0.9436,
1442
+ "step": 11950
1443
+ },
1444
+ {
1445
+ "epoch": 2.28,
1446
+ "learning_rate": 7.722591677750333e-06,
1447
+ "loss": 0.9212,
1448
+ "step": 12000
1449
+ },
1450
+ {
1451
+ "epoch": 2.29,
1452
+ "learning_rate": 7.713091392741782e-06,
1453
+ "loss": 0.95,
1454
+ "step": 12050
1455
+ },
1456
+ {
1457
+ "epoch": 2.3,
1458
+ "learning_rate": 7.703591107733232e-06,
1459
+ "loss": 0.9701,
1460
+ "step": 12100
1461
+ },
1462
+ {
1463
+ "epoch": 2.31,
1464
+ "learning_rate": 7.694090822724683e-06,
1465
+ "loss": 0.9402,
1466
+ "step": 12150
1467
+ },
1468
+ {
1469
+ "epoch": 2.32,
1470
+ "learning_rate": 7.684590537716132e-06,
1471
+ "loss": 0.9419,
1472
+ "step": 12200
1473
  }
1474
  ],
1475
  "max_steps": 52630,
1476
  "num_train_epochs": 10,
1477
+ "total_flos": 4.664385709046784e+19,
1478
  "trial_name": null,
1479
  "trial_params": null
1480
  }
{checkpoint-11600 β†’ checkpoint-12200}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce7ead05fff3caa3b7b28d20216da8399f0f8d3c59ad03f41369e9196756b69b
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3d6ecb7d0de9b755ff2254f0b23a59bdebac0f0720a7a7f6bf6137dda88820
3
  size 166628834
runs/Feb29_06-40-12_88ba3e3089b0/events.out.tfevents.1709188854.88ba3e3089b0.584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3b34a889cd4bdc6538d73ca2cc4228cd65651810682322161e9a1861284cfe5
3
- size 42815
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa5e17d1caff09ce99c86d4fe4fc1855344e39766bd7effb3b06b7eec178394
3
+ size 43443