Hans-Christian BΓΈge Pedersen commited on
Commit
87edf3e
β€’
1 Parent(s): 48c9174

Training in progress, step 14200

Browse files
{checkpoint-13600 β†’ checkpoint-14200}/config.json RENAMED
File without changes
{checkpoint-13600 β†’ checkpoint-14200}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ef09549a78125d1de375d2095ccf6196b177421fc89c97d84799588f3fa5a7c
3
  size 330501178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4629854b6d2f7516c3614b86c6b8a3b31bb89e0aa451a5f6e54287f289fdc3c6
3
  size 330501178
{checkpoint-13600 β†’ checkpoint-14200}/preprocessor_config.json RENAMED
File without changes
{checkpoint-13600 β†’ checkpoint-14200}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4989b2d7f4cae57b5a3aa1cf86e415ff5b1965edd161d195369354348da688
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0095f434db700d3588f4af5ce6bb23a3a7fc70b5d35e5253b019ce1cb8c172ce
3
  size 166628834
{checkpoint-13600 β†’ checkpoint-14200}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:537b572ce49c7fb1c6fd03aafb446241aa726fc8f5b60652b0e98a5dbb1d2e11
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db45b1808e1a2b82d1f76cb07176001727ce19eec258b0d0aab00d7ffa409eb0
3
+ size 14244
{checkpoint-13600 β†’ checkpoint-14200}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0966922245d3c6dcafc3e3da4ae01f193f0ff913d9252f11da8684e4ef0bac2e
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb93e2da8d9da1aec4f1882aaa56ed786d86c462721ce504a8471fd13b7c4e2e
3
  size 988
{checkpoint-13600 β†’ checkpoint-14200}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d47ce7fd4eab69b12315aaacde513de620180503352b93c9d2b97de65db0e8a4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:276b37392024b06e44ae6c01cf2efbfa37883112a2b8c070d4f93f17b342f4fb
3
  size 1064
{checkpoint-13600 β†’ checkpoint-14200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5840775223256696,
5
- "global_step": 13600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1638,11 +1638,83 @@
1638
  "learning_rate": 7.418772563176896e-06,
1639
  "loss": 0.93,
1640
  "step": 13600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1641
  }
1642
  ],
1643
  "max_steps": 52630,
1644
  "num_train_epochs": 10,
1645
- "total_flos": 5.199665023171584e+19,
1646
  "trial_name": null,
1647
  "trial_params": null
1648
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.698080942428273,
5
+ "global_step": 14200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1638
  "learning_rate": 7.418772563176896e-06,
1639
  "loss": 0.93,
1640
  "step": 13600
1641
+ },
1642
+ {
1643
+ "epoch": 2.59,
1644
+ "learning_rate": 7.409272278168346e-06,
1645
+ "loss": 0.9355,
1646
+ "step": 13650
1647
+ },
1648
+ {
1649
+ "epoch": 2.6,
1650
+ "learning_rate": 7.3997719931597955e-06,
1651
+ "loss": 0.9297,
1652
+ "step": 13700
1653
+ },
1654
+ {
1655
+ "epoch": 2.61,
1656
+ "learning_rate": 7.390271708151245e-06,
1657
+ "loss": 0.9037,
1658
+ "step": 13750
1659
+ },
1660
+ {
1661
+ "epoch": 2.62,
1662
+ "learning_rate": 7.380771423142695e-06,
1663
+ "loss": 0.9066,
1664
+ "step": 13800
1665
+ },
1666
+ {
1667
+ "epoch": 2.63,
1668
+ "learning_rate": 7.371271138134145e-06,
1669
+ "loss": 0.9081,
1670
+ "step": 13850
1671
+ },
1672
+ {
1673
+ "epoch": 2.64,
1674
+ "learning_rate": 7.361770853125595e-06,
1675
+ "loss": 0.9332,
1676
+ "step": 13900
1677
+ },
1678
+ {
1679
+ "epoch": 2.65,
1680
+ "learning_rate": 7.352270568117044e-06,
1681
+ "loss": 0.9515,
1682
+ "step": 13950
1683
+ },
1684
+ {
1685
+ "epoch": 2.66,
1686
+ "learning_rate": 7.342770283108493e-06,
1687
+ "loss": 0.9192,
1688
+ "step": 14000
1689
+ },
1690
+ {
1691
+ "epoch": 2.67,
1692
+ "learning_rate": 7.3332699980999435e-06,
1693
+ "loss": 0.9532,
1694
+ "step": 14050
1695
+ },
1696
+ {
1697
+ "epoch": 2.68,
1698
+ "learning_rate": 7.323769713091393e-06,
1699
+ "loss": 0.9195,
1700
+ "step": 14100
1701
+ },
1702
+ {
1703
+ "epoch": 2.69,
1704
+ "learning_rate": 7.314269428082843e-06,
1705
+ "loss": 0.935,
1706
+ "step": 14150
1707
+ },
1708
+ {
1709
+ "epoch": 2.7,
1710
+ "learning_rate": 7.304769143074293e-06,
1711
+ "loss": 0.9003,
1712
+ "step": 14200
1713
  }
1714
  ],
1715
  "max_steps": 52630,
1716
  "num_train_epochs": 10,
1717
+ "total_flos": 5.429070443510784e+19,
1718
  "trial_name": null,
1719
  "trial_params": null
1720
  }
{checkpoint-13600 β†’ checkpoint-14200}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:670470fed4544d93929c2372f87c97e34165b8b048a83ee2aa089a1adc4dddb1
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0095f434db700d3588f4af5ce6bb23a3a7fc70b5d35e5253b019ce1cb8c172ce
3
  size 166628834
runs/Feb29_13-56-06_3a0664c7c2dd/events.out.tfevents.1709215109.3a0664c7c2dd.2321.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bd93f380aeab7a5fef6c099d0b1c1cd2174c9bce2c7a7abef500335123eab98
3
- size 10793
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44289628044379305f2e5b513edc923c821844c15c4113788714dfb4d9dec87
3
+ size 11421