Hans-Christian BΓΈge Pedersen commited on
Commit
2d7d019
β€’
1 Parent(s): 37c1337

Training in progress, step 13800

Browse files
{checkpoint-13200 β†’ checkpoint-13800}/config.json RENAMED
File without changes
{checkpoint-13200 β†’ checkpoint-13800}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef36ca8c8c70233abc7a8cee6a5d1a0555740766ca04707bae03e3f0c580c5f7
3
  size 330501178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914978b2a4788b287da0c998e5aacef46d279ce11afef35e802cfb5b777a0f5f
3
  size 330501178
{checkpoint-13200 β†’ checkpoint-13800}/preprocessor_config.json RENAMED
File without changes
{checkpoint-13200 β†’ checkpoint-13800}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a603da01c74d9504c35845be6ee4365c2df4b2bd1c14a68b036545f8b9bfe0cf
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3843b02ee5ae11c37ff2887d62bb97a97af3189ee614e15e6c71c023fa806df5
3
  size 166628834
{checkpoint-13200 β†’ checkpoint-13800}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baeb722c7eb9df4195b8f19894482c730fd38333b5ab545d6c278f67dba1e795
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d593abced29e6f34e3600625cf827fd4591fcf821bb4f9bff7add8ae91e99077
3
  size 14244
{checkpoint-13200 β†’ checkpoint-13800}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a14aac3f1a42a5643173abb85e00e68e78f46b63a124973b1fb9b5013022b7a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ee1413e1cf9c272323ce5f4dece3c6a0e346bfd0db69d70c75a91f9fbae55f4
3
  size 988
{checkpoint-13200 β†’ checkpoint-13800}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811bdd1945539ce0d69d353e2dedff42b36763d7e95bd7d48ace3a36632e96c8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1b01715c4d4978b8bd9921ee2051456f9f8e18431463aed7053a7c4d3d0170
3
  size 1064
{checkpoint-13200 β†’ checkpoint-13800}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.508075242257268,
5
- "global_step": 13200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1590,11 +1590,83 @@
1590
  "learning_rate": 7.494584837545127e-06,
1591
  "loss": 0.9041,
1592
  "step": 13200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1593
  }
1594
  ],
1595
  "max_steps": 52630,
1596
  "num_train_epochs": 10,
1597
- "total_flos": 5.046728076278784e+19,
1598
  "trial_name": null,
1599
  "trial_params": null
1600
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6220786623598706,
5
+ "global_step": 13800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1590
  "learning_rate": 7.494584837545127e-06,
1591
  "loss": 0.9041,
1592
  "step": 13200
1593
+ },
1594
+ {
1595
+ "epoch": 2.52,
1596
+ "learning_rate": 7.485274558236748e-06,
1597
+ "loss": 0.9399,
1598
+ "step": 13250
1599
+ },
1600
+ {
1601
+ "epoch": 2.53,
1602
+ "learning_rate": 7.475774273228198e-06,
1603
+ "loss": 0.9176,
1604
+ "step": 13300
1605
+ },
1606
+ {
1607
+ "epoch": 2.54,
1608
+ "learning_rate": 7.466273988219647e-06,
1609
+ "loss": 0.9235,
1610
+ "step": 13350
1611
+ },
1612
+ {
1613
+ "epoch": 2.55,
1614
+ "learning_rate": 7.456773703211096e-06,
1615
+ "loss": 0.8947,
1616
+ "step": 13400
1617
+ },
1618
+ {
1619
+ "epoch": 2.56,
1620
+ "learning_rate": 7.4472734182025465e-06,
1621
+ "loss": 0.9205,
1622
+ "step": 13450
1623
+ },
1624
+ {
1625
+ "epoch": 2.57,
1626
+ "learning_rate": 7.437773133193996e-06,
1627
+ "loss": 0.9188,
1628
+ "step": 13500
1629
+ },
1630
+ {
1631
+ "epoch": 2.57,
1632
+ "learning_rate": 7.428272848185446e-06,
1633
+ "loss": 0.9285,
1634
+ "step": 13550
1635
+ },
1636
+ {
1637
+ "epoch": 2.58,
1638
+ "learning_rate": 7.418772563176896e-06,
1639
+ "loss": 0.93,
1640
+ "step": 13600
1641
+ },
1642
+ {
1643
+ "epoch": 2.59,
1644
+ "learning_rate": 7.409272278168346e-06,
1645
+ "loss": 0.9355,
1646
+ "step": 13650
1647
+ },
1648
+ {
1649
+ "epoch": 2.6,
1650
+ "learning_rate": 7.3997719931597955e-06,
1651
+ "loss": 0.9297,
1652
+ "step": 13700
1653
+ },
1654
+ {
1655
+ "epoch": 2.61,
1656
+ "learning_rate": 7.390271708151245e-06,
1657
+ "loss": 0.9037,
1658
+ "step": 13750
1659
+ },
1660
+ {
1661
+ "epoch": 2.62,
1662
+ "learning_rate": 7.380771423142695e-06,
1663
+ "loss": 0.9066,
1664
+ "step": 13800
1665
  }
1666
  ],
1667
  "max_steps": 52630,
1668
  "num_train_epochs": 10,
1669
+ "total_flos": 5.276133496617984e+19,
1670
  "trial_name": null,
1671
  "trial_params": null
1672
  }
{checkpoint-13200 β†’ checkpoint-13800}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4989b2d7f4cae57b5a3aa1cf86e415ff5b1965edd161d195369354348da688
3
  size 166628834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3843b02ee5ae11c37ff2887d62bb97a97af3189ee614e15e6c71c023fa806df5
3
  size 166628834
runs/Feb29_13-56-06_3a0664c7c2dd/events.out.tfevents.1709215109.3a0664c7c2dd.2321.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d5debafc9304f7db453dcd62a7183edc61c47e61bd1ddedd70eef86531d738
3
- size 9537
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5420ea52b1a60c7d79aba070b7a3c2d00dbef6cff73689c11e602a6ead721e49
3
+ size 10165