stefania-radu commited on
Commit
4bf20bf
1 Parent(s): 4a8a79d

Training in progress, step 240000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a06ef5c009b6a3d34b61b8e9435e48bd8704c1092e28f5d3a8a0669a2a565c45
3
  size 893441530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11144856c02c8e972a36fb5b42d4223035071c4a917ea79c06d582d2b9be6085
3
  size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95f9300ed2f42668d1280e1852bf99100220c183e1a5aa40fe963881c2f1f15c
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ad81878847ff45e1880bc9e193537b6a982e87fb64a13bbf06eba84c2c3ee6f
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da60bc481c5e8dde4e51a48ffefd72b80167a866448e54613b2b81fb34be2af3
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20be28e08eeb080e0d87de2c1d006e31a433ae3d529b46a69dfe770492fd995c
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa1786a2f6edec81fd73665bbfadc34243a09e187302f15861e7ba28d522bf54
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c781a803fdb01dba917962146262a56eb080213a823923503263cf460fa3ac80
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.23,
5
- "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1708,11 +1708,85 @@
1708
  "eval_samples_per_second": 106.178,
1709
  "eval_steps_per_second": 13.272,
1710
  "step": 230000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1711
  }
1712
  ],
1713
  "max_steps": 1000000,
1714
  "num_train_epochs": 9223372036854775807,
1715
- "total_flos": 2.036560339843154e+21,
1716
  "trial_name": null,
1717
  "trial_params": null
1718
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24,
5
+ "global_step": 240000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1708
  "eval_samples_per_second": 106.178,
1709
  "eval_steps_per_second": 13.272,
1710
  "step": 230000
1711
+ },
1712
+ {
1713
+ "epoch": 0.23,
1714
+ "learning_rate": 1.798940649585048e-05,
1715
+ "loss": 0.3703,
1716
+ "step": 231000
1717
+ },
1718
+ {
1719
+ "epoch": 0.23,
1720
+ "learning_rate": 1.7981234828858012e-05,
1721
+ "loss": 0.3696,
1722
+ "step": 232000
1723
+ },
1724
+ {
1725
+ "epoch": 0.23,
1726
+ "learning_rate": 1.7973023724659226e-05,
1727
+ "loss": 0.3696,
1728
+ "step": 233000
1729
+ },
1730
+ {
1731
+ "epoch": 0.23,
1732
+ "learning_rate": 1.7964773273049443e-05,
1733
+ "loss": 0.3696,
1734
+ "step": 234000
1735
+ },
1736
+ {
1737
+ "epoch": 0.23,
1738
+ "learning_rate": 1.795648356425428e-05,
1739
+ "loss": 0.3689,
1740
+ "step": 235000
1741
+ },
1742
+ {
1743
+ "epoch": 0.23,
1744
+ "eval_runtime": 3392.6969,
1745
+ "eval_samples_per_second": 100.113,
1746
+ "eval_steps_per_second": 12.514,
1747
+ "step": 235000
1748
+ },
1749
+ {
1750
+ "epoch": 0.24,
1751
+ "learning_rate": 1.7948154688928657e-05,
1752
+ "loss": 0.3691,
1753
+ "step": 236000
1754
+ },
1755
+ {
1756
+ "epoch": 0.24,
1757
+ "learning_rate": 1.793978673815583e-05,
1758
+ "loss": 0.368,
1759
+ "step": 237000
1760
+ },
1761
+ {
1762
+ "epoch": 0.24,
1763
+ "learning_rate": 1.7931379803446365e-05,
1764
+ "loss": 0.3662,
1765
+ "step": 238000
1766
+ },
1767
+ {
1768
+ "epoch": 0.24,
1769
+ "learning_rate": 1.792293397673715e-05,
1770
+ "loss": 0.3634,
1771
+ "step": 239000
1772
+ },
1773
+ {
1774
+ "epoch": 0.24,
1775
+ "learning_rate": 1.791444935039039e-05,
1776
+ "loss": 0.365,
1777
+ "step": 240000
1778
+ },
1779
+ {
1780
+ "epoch": 0.24,
1781
+ "eval_runtime": 3401.6269,
1782
+ "eval_samples_per_second": 99.85,
1783
+ "eval_steps_per_second": 12.481,
1784
+ "step": 240000
1785
  }
1786
  ],
1787
  "max_steps": 1000000,
1788
  "num_train_epochs": 9223372036854775807,
1789
+ "total_flos": 2.125106441575465e+21,
1790
  "trial_name": null,
1791
  "trial_params": null
1792
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95f9300ed2f42668d1280e1852bf99100220c183e1a5aa40fe963881c2f1f15c
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ad81878847ff45e1880bc9e193537b6a982e87fb64a13bbf06eba84c2c3ee6f
3
  size 454197066