Hans-Christian BΓΈge Pedersen commited on
Commit
2515262
β€’
1 Parent(s): 06fbe53

Training in progress, step 12000

Browse files
{checkpoint-11400 β†’ checkpoint-12000}/config.json RENAMED
File without changes
{checkpoint-11400 β†’ checkpoint-12000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51059b9f784869f50dd486eb75789ff1100313039cd9915ccdc24d02f4a54362
3
  size 330501178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e394c8c5d4f935ff0aee7a185e6aa2e5c3eafb4ec593dfda34f42f03cf70a38
3
  size 330501178
{checkpoint-11400 β†’ checkpoint-12000}/preprocessor_config.json RENAMED
File without changes
{checkpoint-11400 β†’ checkpoint-12000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7548a6ca2f5ae4a311faa32c37a23c748dfe15d36185ec0c99e9131ccf462a42
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7ead05fff3caa3b7b28d20216da8399f0f8d3c59ad03f41369e9196756b69b
3
  size 166628834
{checkpoint-11400 β†’ checkpoint-12000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6d15c9ab1a39bc9cbeba851393001f0b83f94fa83870124e670ed5ce08dfb59
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42099ca80a2827f586e2cd0357b4bc186e31647613412ebe9b9139558b0086db
3
+ size 14244
{checkpoint-11400 β†’ checkpoint-12000}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bac664126ce3c5e13e744c139ee130c846e9a0d1d9dc5ca209b2be886e743fce
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:901ccc6b629ca31f80d8fcc7094c82ebb007876337324efbc24cfb3fffb68ca6
3
  size 988
{checkpoint-11400 β†’ checkpoint-12000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a7082b2db3c89cb9935a0d0e0d465ce412b06cd4a642b4dd7a786feb67e5376
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe7bb4cfa76efc67c49531c06369642b1628cfa905abd33637961592c6799ba
3
  size 1064
{checkpoint-11400 β†’ checkpoint-12000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.1660649819494586,
5
- "global_step": 11400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1374,11 +1374,83 @@
1374
  "learning_rate": 7.836595097852936e-06,
1375
  "loss": 0.9605,
1376
  "step": 11400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1377
  }
1378
  ],
1379
  "max_steps": 52630,
1380
  "num_train_epochs": 10,
1381
- "total_flos": 4.358511815261184e+19,
1382
  "trial_name": null,
1383
  "trial_params": null
1384
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.2800684020520614,
5
+ "global_step": 12000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1374
  "learning_rate": 7.836595097852936e-06,
1375
  "loss": 0.9605,
1376
  "step": 11400
1377
+ },
1378
+ {
1379
+ "epoch": 2.18,
1380
+ "learning_rate": 7.827094812844385e-06,
1381
+ "loss": 0.9283,
1382
+ "step": 11450
1383
+ },
1384
+ {
1385
+ "epoch": 2.19,
1386
+ "learning_rate": 7.817594527835835e-06,
1387
+ "loss": 0.9115,
1388
+ "step": 11500
1389
+ },
1390
+ {
1391
+ "epoch": 2.19,
1392
+ "learning_rate": 7.808094242827286e-06,
1393
+ "loss": 0.9508,
1394
+ "step": 11550
1395
+ },
1396
+ {
1397
+ "epoch": 2.2,
1398
+ "learning_rate": 7.798593957818735e-06,
1399
+ "loss": 0.9419,
1400
+ "step": 11600
1401
+ },
1402
+ {
1403
+ "epoch": 2.21,
1404
+ "learning_rate": 7.789093672810185e-06,
1405
+ "loss": 0.9078,
1406
+ "step": 11650
1407
+ },
1408
+ {
1409
+ "epoch": 2.22,
1410
+ "learning_rate": 7.779593387801634e-06,
1411
+ "loss": 0.9313,
1412
+ "step": 11700
1413
+ },
1414
+ {
1415
+ "epoch": 2.23,
1416
+ "learning_rate": 7.770093102793086e-06,
1417
+ "loss": 0.9452,
1418
+ "step": 11750
1419
+ },
1420
+ {
1421
+ "epoch": 2.24,
1422
+ "learning_rate": 7.760592817784535e-06,
1423
+ "loss": 0.9401,
1424
+ "step": 11800
1425
+ },
1426
+ {
1427
+ "epoch": 2.25,
1428
+ "learning_rate": 7.751092532775983e-06,
1429
+ "loss": 0.9278,
1430
+ "step": 11850
1431
+ },
1432
+ {
1433
+ "epoch": 2.26,
1434
+ "learning_rate": 7.741592247767434e-06,
1435
+ "loss": 0.931,
1436
+ "step": 11900
1437
+ },
1438
+ {
1439
+ "epoch": 2.27,
1440
+ "learning_rate": 7.732091962758883e-06,
1441
+ "loss": 0.9436,
1442
+ "step": 11950
1443
+ },
1444
+ {
1445
+ "epoch": 2.28,
1446
+ "learning_rate": 7.722591677750333e-06,
1447
+ "loss": 0.9212,
1448
+ "step": 12000
1449
  }
1450
  ],
1451
  "max_steps": 52630,
1452
  "num_train_epochs": 10,
1453
+ "total_flos": 4.587917235600384e+19,
1454
  "trial_name": null,
1455
  "trial_params": null
1456
  }
{checkpoint-11400 β†’ checkpoint-12000}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:941f644e8d17ec96d3c53c66b204f2d746989d17f3afd50aef4a79442ed41cb8
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7ead05fff3caa3b7b28d20216da8399f0f8d3c59ad03f41369e9196756b69b
3
  size 166628834
runs/Feb29_06-40-12_88ba3e3089b0/events.out.tfevents.1709188854.88ba3e3089b0.584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba88144b93f482dc63e502f528b19a12f86253af64663b386b0894fbeb13610b
3
- size 42187
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b34a889cd4bdc6538d73ca2cc4228cd65651810682322161e9a1861284cfe5
3
+ size 42815