Nekofox commited on
Commit
b0dc783
1 Parent(s): f8f9bd0

Training in progress, step 136000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8aa69fa081bb9b7470ad363847ef75f3a283f3463ddba236aafae461a2fd3b9
3
  size 3871544599
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f48d508ece7df760eb555c017887dbe66d0cba6c7cfec5e6d05c94a3f8988e
3
  size 3871544599
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec088332debea15871857309043358ce7a97e1e0ca384609c2a131a5b45b6ec
3
  size 1944201353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2a9024dd8a689e9ae503a3b92563cbd3856dd8ddb0b64a278d1035564872316
3
  size 1944201353
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1efb79d23eba5169f878156012a33e95ccf5864793451e2ed51a78eaf9b34135
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586b3dca1089776e32d357296b4040f36f7c66e484c78b70291b9bf859f0da7d
3
  size 14575
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab7e6a0965f9191616cbd8a7051ebb5ae9b3ed2cba001391ed6b68095ae5e49
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be7d27f5adca25da92af93035570ac65d76bd8fd5301a951f17e1a456e8d376
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a77689f98dc0a26b40937e0eae71a1cc87cd7724106c708110a77e3cf0feee9a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19bd67e3926298d10573f580a7616d969cad0d6d8378e7cf425b0e24b2dc768a
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9327830855333823,
5
- "global_step": 132000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1590,11 +1590,59 @@
1590
  "learning_rate": 4.893511776430863e-06,
1591
  "loss": 1.6672,
1592
  "step": 132000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1593
  }
1594
  ],
1595
  "max_steps": 1415120,
1596
  "num_train_epochs": 10,
1597
- "total_flos": 5.87863269950423e+16,
1598
  "trial_name": null,
1599
  "trial_params": null
1600
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9610492396404545,
5
+ "global_step": 136000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1590
  "learning_rate": 4.893511776430863e-06,
1591
  "loss": 1.6672,
1592
  "step": 132000
1593
+ },
1594
+ {
1595
+ "epoch": 0.94,
1596
+ "learning_rate": 4.892709015746761e-06,
1597
+ "loss": 1.6937,
1598
+ "step": 132500
1599
+ },
1600
+ {
1601
+ "epoch": 0.94,
1602
+ "learning_rate": 4.89190492131604e-06,
1603
+ "loss": 1.676,
1604
+ "step": 133000
1605
+ },
1606
+ {
1607
+ "epoch": 0.94,
1608
+ "learning_rate": 4.891096271306403e-06,
1609
+ "loss": 1.6808,
1610
+ "step": 133500
1611
+ },
1612
+ {
1613
+ "epoch": 0.95,
1614
+ "learning_rate": 4.890284675178114e-06,
1615
+ "loss": 1.6601,
1616
+ "step": 134000
1617
+ },
1618
+ {
1619
+ "epoch": 0.95,
1620
+ "learning_rate": 4.8894701339311555e-06,
1621
+ "loss": 1.6762,
1622
+ "step": 134500
1623
+ },
1624
+ {
1625
+ "epoch": 0.95,
1626
+ "learning_rate": 4.888654286477422e-06,
1627
+ "loss": 1.6826,
1628
+ "step": 135000
1629
+ },
1630
+ {
1631
+ "epoch": 0.96,
1632
+ "learning_rate": 4.8878338638927996e-06,
1633
+ "loss": 1.649,
1634
+ "step": 135500
1635
+ },
1636
+ {
1637
+ "epoch": 0.96,
1638
+ "learning_rate": 4.887010499209203e-06,
1639
+ "loss": 1.677,
1640
+ "step": 136000
1641
  }
1642
  ],
1643
  "max_steps": 1415120,
1644
  "num_train_epochs": 10,
1645
+ "total_flos": 6.00884944380887e+16,
1646
  "trial_name": null,
1647
  "trial_params": null
1648
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec088332debea15871857309043358ce7a97e1e0ca384609c2a131a5b45b6ec
3
  size 1944201353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2a9024dd8a689e9ae503a3b92563cbd3856dd8ddb0b64a278d1035564872316
3
  size 1944201353
runs/May16_05-26-38_740b6dbf3e56/events.out.tfevents.1684216250.740b6dbf3e56.4449.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60445f76728e4248bb41052178874475009af41b1d2b315a5a381d9378d96232
3
- size 13423
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a86a89599e9513699a13d646354a3c7e3196e861192b3d15d0bfb827290eb6
3
+ size 14703