joe611 commited on
Commit
5d1dd71
·
verified ·
1 Parent(s): 6b1ce1b

Training in progress, epoch 149, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65e330108a44107cfd3a060af52a09c3766517975f292eb60bcf37d37d8254d3
3
  size 166496880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02258098985a292d68d20bf94bf89b17ca13110b1ae56ee863f5f0d079f4238d
3
  size 166496880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92085ff0fb81e4a4f2212367a112f07fb546d111416dabb30e3ddb0c918d51b1
3
  size 330495866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761e90a5b9c3c01cf812cc7dbf7f8345138f49ad9e203ed4d4779cfad169a777
3
  size 330495866
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795c218f3ddbb3e76a77ad88cdf831b8efe198149593238e9275c9118253ec02
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ff39ce5bc1f6039b31922a6e443ab8d8a8f93d5528fc0d576340ae329fb493
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c145ebabe049cbd8d4c8ceced4c707816290a109bf20ccefa837d8cb555a00
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb678f76da1c9347406d38fe82346b2ac3acd84e6118cb46f17ee79a3da28612
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.24468238651752472,
3
  "best_model_checkpoint": "chickens-composite-403232323232-150-epochs-wo-transform-metrics-test/checkpoint-95000",
4
- "epoch": 148.0,
5
  "eval_steps": 500,
6
- "global_step": 148000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -38386,6 +38386,263 @@
38386
  "eval_samples_per_second": 15.264,
38387
  "eval_steps_per_second": 1.908,
38388
  "step": 148000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38389
  }
38390
  ],
38391
  "logging_steps": 30,
@@ -38405,7 +38662,7 @@
38405
  "attributes": {}
38406
  }
38407
  },
38408
- "total_flos": 5.09154207105024e+19,
38409
  "train_batch_size": 2,
38410
  "trial_name": null,
38411
  "trial_params": null
 
1
  {
2
  "best_metric": 0.24468238651752472,
3
  "best_model_checkpoint": "chickens-composite-403232323232-150-epochs-wo-transform-metrics-test/checkpoint-95000",
4
+ "epoch": 149.0,
5
  "eval_steps": 500,
6
+ "global_step": 149000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
38386
  "eval_samples_per_second": 15.264,
38387
  "eval_steps_per_second": 1.908,
38388
  "step": 148000
38389
+ },
38390
+ {
38391
+ "epoch": 148.02,
38392
+ "grad_norm": 42.084232330322266,
38393
+ "learning_rate": 4.298583608501328e-09,
38394
+ "loss": 0.1631,
38395
+ "step": 148020
38396
+ },
38397
+ {
38398
+ "epoch": 148.05,
38399
+ "grad_norm": 42.267086029052734,
38400
+ "learning_rate": 4.169328287299545e-09,
38401
+ "loss": 0.1619,
38402
+ "step": 148050
38403
+ },
38404
+ {
38405
+ "epoch": 148.08,
38406
+ "grad_norm": 39.14877700805664,
38407
+ "learning_rate": 4.042045240927883e-09,
38408
+ "loss": 0.1887,
38409
+ "step": 148080
38410
+ },
38411
+ {
38412
+ "epoch": 148.11,
38413
+ "grad_norm": 75.59671020507812,
38414
+ "learning_rate": 3.9167345196361454e-09,
38415
+ "loss": 0.1908,
38416
+ "step": 148110
38417
+ },
38418
+ {
38419
+ "epoch": 148.14,
38420
+ "grad_norm": 87.17718505859375,
38421
+ "learning_rate": 3.793396172895314e-09,
38422
+ "loss": 0.1728,
38423
+ "step": 148140
38424
+ },
38425
+ {
38426
+ "epoch": 148.17,
38427
+ "grad_norm": 37.11481475830078,
38428
+ "learning_rate": 3.672030249396441e-09,
38429
+ "loss": 0.1674,
38430
+ "step": 148170
38431
+ },
38432
+ {
38433
+ "epoch": 148.2,
38434
+ "grad_norm": 34.75517272949219,
38435
+ "learning_rate": 3.5526367970539765e-09,
38436
+ "loss": 0.1768,
38437
+ "step": 148200
38438
+ },
38439
+ {
38440
+ "epoch": 148.23,
38441
+ "grad_norm": 205.51870727539062,
38442
+ "learning_rate": 3.4352158630018837e-09,
38443
+ "loss": 0.1609,
38444
+ "step": 148230
38445
+ },
38446
+ {
38447
+ "epoch": 148.26,
38448
+ "grad_norm": 35.48714065551758,
38449
+ "learning_rate": 3.31976749359586e-09,
38450
+ "loss": 0.1847,
38451
+ "step": 148260
38452
+ },
38453
+ {
38454
+ "epoch": 148.29,
38455
+ "grad_norm": 143.81561279296875,
38456
+ "learning_rate": 3.206291734413891e-09,
38457
+ "loss": 0.1883,
38458
+ "step": 148290
38459
+ },
38460
+ {
38461
+ "epoch": 148.32,
38462
+ "grad_norm": 98.67874908447266,
38463
+ "learning_rate": 3.094788630254031e-09,
38464
+ "loss": 0.1725,
38465
+ "step": 148320
38466
+ },
38467
+ {
38468
+ "epoch": 148.35,
38469
+ "grad_norm": 51.73759078979492,
38470
+ "learning_rate": 2.9852582251355124e-09,
38471
+ "loss": 0.1788,
38472
+ "step": 148350
38473
+ },
38474
+ {
38475
+ "epoch": 148.38,
38476
+ "grad_norm": 37.59303665161133,
38477
+ "learning_rate": 2.8777005622998567e-09,
38478
+ "loss": 0.1682,
38479
+ "step": 148380
38480
+ },
38481
+ {
38482
+ "epoch": 148.41,
38483
+ "grad_norm": 80.69348907470703,
38484
+ "learning_rate": 2.772115684209209e-09,
38485
+ "loss": 0.1846,
38486
+ "step": 148410
38487
+ },
38488
+ {
38489
+ "epoch": 148.44,
38490
+ "grad_norm": 35.9366455078125,
38491
+ "learning_rate": 2.6685036325457826e-09,
38492
+ "loss": 0.1715,
38493
+ "step": 148440
38494
+ },
38495
+ {
38496
+ "epoch": 148.47,
38497
+ "grad_norm": 1834.0330810546875,
38498
+ "learning_rate": 2.5668644482151892e-09,
38499
+ "loss": 0.1731,
38500
+ "step": 148470
38501
+ },
38502
+ {
38503
+ "epoch": 148.5,
38504
+ "grad_norm": 30.97291374206543,
38505
+ "learning_rate": 2.4671981713420003e-09,
38506
+ "loss": 0.1783,
38507
+ "step": 148500
38508
+ },
38509
+ {
38510
+ "epoch": 148.53,
38511
+ "grad_norm": 33.913604736328125,
38512
+ "learning_rate": 2.369504841273629e-09,
38513
+ "loss": 0.1843,
38514
+ "step": 148530
38515
+ },
38516
+ {
38517
+ "epoch": 148.56,
38518
+ "grad_norm": 22.96918487548828,
38519
+ "learning_rate": 2.2737844965775578e-09,
38520
+ "loss": 0.1918,
38521
+ "step": 148560
38522
+ },
38523
+ {
38524
+ "epoch": 148.59,
38525
+ "grad_norm": 35.848934173583984,
38526
+ "learning_rate": 2.1800371750430037e-09,
38527
+ "loss": 0.1711,
38528
+ "step": 148590
38529
+ },
38530
+ {
38531
+ "epoch": 148.62,
38532
+ "grad_norm": 130.026123046875,
38533
+ "learning_rate": 2.088262913679251e-09,
38534
+ "loss": 0.1766,
38535
+ "step": 148620
38536
+ },
38537
+ {
38538
+ "epoch": 148.65,
38539
+ "grad_norm": 84.3224105834961,
38540
+ "learning_rate": 1.9984617487173174e-09,
38541
+ "loss": 0.1673,
38542
+ "step": 148650
38543
+ },
38544
+ {
38545
+ "epoch": 148.68,
38546
+ "grad_norm": 116.34769439697266,
38547
+ "learning_rate": 1.9106337156099553e-09,
38548
+ "loss": 0.1798,
38549
+ "step": 148680
38550
+ },
38551
+ {
38552
+ "epoch": 148.71,
38553
+ "grad_norm": 88.51609802246094,
38554
+ "learning_rate": 1.8247788490299846e-09,
38555
+ "loss": 0.1918,
38556
+ "step": 148710
38557
+ },
38558
+ {
38559
+ "epoch": 148.74,
38560
+ "grad_norm": 43.09914779663086,
38561
+ "learning_rate": 1.740897182871404e-09,
38562
+ "loss": 0.187,
38563
+ "step": 148740
38564
+ },
38565
+ {
38566
+ "epoch": 148.77,
38567
+ "grad_norm": 45.688201904296875,
38568
+ "learning_rate": 1.6589887502493907e-09,
38569
+ "loss": 0.1915,
38570
+ "step": 148770
38571
+ },
38572
+ {
38573
+ "epoch": 148.8,
38574
+ "grad_norm": 26.354772567749023,
38575
+ "learning_rate": 1.5790535835003006e-09,
38576
+ "loss": 0.1609,
38577
+ "step": 148800
38578
+ },
38579
+ {
38580
+ "epoch": 148.83,
38581
+ "grad_norm": 122.46343231201172,
38582
+ "learning_rate": 1.5010917141811132e-09,
38583
+ "loss": 0.1604,
38584
+ "step": 148830
38585
+ },
38586
+ {
38587
+ "epoch": 148.86,
38588
+ "grad_norm": 406.5648193359375,
38589
+ "learning_rate": 1.425103173069986e-09,
38590
+ "loss": 0.2423,
38591
+ "step": 148860
38592
+ },
38593
+ {
38594
+ "epoch": 148.89,
38595
+ "grad_norm": 59.21432876586914,
38596
+ "learning_rate": 1.3510879901657003e-09,
38597
+ "loss": 0.1776,
38598
+ "step": 148890
38599
+ },
38600
+ {
38601
+ "epoch": 148.92,
38602
+ "grad_norm": 53.223148345947266,
38603
+ "learning_rate": 1.2790461946887712e-09,
38604
+ "loss": 0.1913,
38605
+ "step": 148920
38606
+ },
38607
+ {
38608
+ "epoch": 148.95,
38609
+ "grad_norm": 63.24610137939453,
38610
+ "learning_rate": 1.2089778150797816e-09,
38611
+ "loss": 0.1795,
38612
+ "step": 148950
38613
+ },
38614
+ {
38615
+ "epoch": 148.98,
38616
+ "grad_norm": 66.03044128417969,
38617
+ "learning_rate": 1.1408828790010484e-09,
38618
+ "loss": 0.1662,
38619
+ "step": 148980
38620
+ },
38621
+ {
38622
+ "epoch": 149.0,
38623
+ "eval_loss": 0.25540730357170105,
38624
+ "eval_map": 0.8454,
38625
+ "eval_map_50": 0.9638,
38626
+ "eval_map_75": 0.9414,
38627
+ "eval_map_chicken": 0.8417,
38628
+ "eval_map_duck": 0.7981,
38629
+ "eval_map_large": 0.8191,
38630
+ "eval_map_medium": 0.8525,
38631
+ "eval_map_plant": 0.8964,
38632
+ "eval_map_small": 0.3586,
38633
+ "eval_mar_1": 0.3399,
38634
+ "eval_mar_10": 0.8795,
38635
+ "eval_mar_100": 0.8819,
38636
+ "eval_mar_100_chicken": 0.8813,
38637
+ "eval_mar_100_duck": 0.8407,
38638
+ "eval_mar_100_plant": 0.9237,
38639
+ "eval_mar_large": 0.8593,
38640
+ "eval_mar_medium": 0.8889,
38641
+ "eval_mar_small": 0.5029,
38642
+ "eval_runtime": 13.5683,
38643
+ "eval_samples_per_second": 14.74,
38644
+ "eval_steps_per_second": 1.843,
38645
+ "step": 149000
38646
  }
38647
  ],
38648
  "logging_steps": 30,
 
38662
  "attributes": {}
38663
  }
38664
  },
38665
+ "total_flos": 5.12594438234112e+19,
38666
  "train_batch_size": 2,
38667
  "trial_name": null,
38668
  "trial_params": null