stefania-radu commited on
Commit
f0925b8
1 Parent(s): 4bf20bf

Training in progress, step 250000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11144856c02c8e972a36fb5b42d4223035071c4a917ea79c06d582d2b9be6085
3
  size 893441530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce3e998a193326c567e5bf36b4614bcf9a92a873dabc8d985ee1ad116cb61ad2
3
  size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ad81878847ff45e1880bc9e193537b6a982e87fb64a13bbf06eba84c2c3ee6f
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36acb3092fbd4cafad2bc39be38c35f4299b28e4d00c2f26079a5a4d3580780f
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20be28e08eeb080e0d87de2c1d006e31a433ae3d529b46a69dfe770492fd995c
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f82edd7a6194c8353e8edba667f026c5f098b23c6a725d52469afb859bd8fc
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c781a803fdb01dba917962146262a56eb080213a823923503263cf460fa3ac80
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57954a9914278616526cbeea30401601bc7a6e242a7a52a60b8796e825a3c98
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24,
5
- "global_step": 240000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1782,11 +1782,85 @@
1782
  "eval_samples_per_second": 99.85,
1783
  "eval_steps_per_second": 12.481,
1784
  "step": 240000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1785
  }
1786
  ],
1787
  "max_steps": 1000000,
1788
  "num_train_epochs": 9223372036854775807,
1789
- "total_flos": 2.125106441575465e+21,
1790
  "trial_name": null,
1791
  "trial_params": null
1792
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.25,
5
+ "global_step": 250000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1782
  "eval_samples_per_second": 99.85,
1783
  "eval_steps_per_second": 12.481,
1784
  "step": 240000
1785
+ },
1786
+ {
1787
+ "epoch": 0.24,
1788
+ "learning_rate": 1.7905926017192613e-05,
1789
+ "loss": 0.3632,
1790
+ "step": 241000
1791
+ },
1792
+ {
1793
+ "epoch": 0.24,
1794
+ "learning_rate": 1.7897364070353612e-05,
1795
+ "loss": 0.3633,
1796
+ "step": 242000
1797
+ },
1798
+ {
1799
+ "epoch": 0.24,
1800
+ "learning_rate": 1.788876360350547e-05,
1801
+ "loss": 0.3628,
1802
+ "step": 243000
1803
+ },
1804
+ {
1805
+ "epoch": 0.24,
1806
+ "learning_rate": 1.7880124710701515e-05,
1807
+ "loss": 0.3628,
1808
+ "step": 244000
1809
+ },
1810
+ {
1811
+ "epoch": 0.24,
1812
+ "learning_rate": 1.7871447486415292e-05,
1813
+ "loss": 0.363,
1814
+ "step": 245000
1815
+ },
1816
+ {
1817
+ "epoch": 0.24,
1818
+ "eval_runtime": 3598.9717,
1819
+ "eval_samples_per_second": 94.375,
1820
+ "eval_steps_per_second": 11.797,
1821
+ "step": 245000
1822
+ },
1823
+ {
1824
+ "epoch": 0.25,
1825
+ "learning_rate": 1.7862732025539543e-05,
1826
+ "loss": 0.364,
1827
+ "step": 246000
1828
+ },
1829
+ {
1830
+ "epoch": 0.25,
1831
+ "learning_rate": 1.7853978423385145e-05,
1832
+ "loss": 0.3626,
1833
+ "step": 247000
1834
+ },
1835
+ {
1836
+ "epoch": 0.25,
1837
+ "learning_rate": 1.784518677568009e-05,
1838
+ "loss": 0.361,
1839
+ "step": 248000
1840
+ },
1841
+ {
1842
+ "epoch": 0.25,
1843
+ "learning_rate": 1.783635717856843e-05,
1844
+ "loss": 0.3597,
1845
+ "step": 249000
1846
+ },
1847
+ {
1848
+ "epoch": 0.25,
1849
+ "learning_rate": 1.782748972860922e-05,
1850
+ "loss": 0.3626,
1851
+ "step": 250000
1852
+ },
1853
+ {
1854
+ "epoch": 0.25,
1855
+ "eval_runtime": 2943.8602,
1856
+ "eval_samples_per_second": 115.376,
1857
+ "eval_steps_per_second": 14.422,
1858
+ "step": 250000
1859
  }
1860
  ],
1861
  "max_steps": 1000000,
1862
  "num_train_epochs": 9223372036854775807,
1863
+ "total_flos": 2.213652543307776e+21,
1864
  "trial_name": null,
1865
  "trial_params": null
1866
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02dcd885a628a7855e3e0c6a6edcce3320ca6c2778748fda625b892498f6832
3
  size 3768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b59685dedbedfa441a393ef0561d0eeade03f2fa8b31fadfbec3e0393ee18f
3
  size 3768
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ad81878847ff45e1880bc9e193537b6a982e87fb64a13bbf06eba84c2c3ee6f
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36acb3092fbd4cafad2bc39be38c35f4299b28e4d00c2f26079a5a4d3580780f
3
  size 454197066
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02dcd885a628a7855e3e0c6a6edcce3320ca6c2778748fda625b892498f6832
3
  size 3768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b59685dedbedfa441a393ef0561d0eeade03f2fa8b31fadfbec3e0393ee18f
3
  size 3768