diaenra commited on
Commit
03b750c
·
verified ·
1 Parent(s): 57fc82f

Training in progress, step 283, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28702cfc13f12e38ed7e8748902970e8f15746494ae60bf0c05bb32ce4cfb306
3
  size 1140880624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4636f56b84a29bc498149f8fa29ab96cd149578b787eaec38b88594a715d2c
3
  size 1140880624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042a1bb5adc484f5d364789a71e9be5c00f2196d7fb29dac5dfeacd44c11616c
3
  size 2281891834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f440c1bf2cce0bb3890a1db85b5dfa6f57d9db3073b20a1ea822a1fd304bdb03
3
  size 2281891834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:451887e4835bc2148eb7c8fb62a54fce7c4115e0b23975de9fe5ccb06c8afce4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64af67540b37ea7895c7f0895b6fd3530a619682b8fb6206680dab892bb34ea0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:372e6fe050f71f63f824569eaebe72289a3e5447184748fac9f5e02dd918695e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4512787257fe5eeb9de23bd70dafa8e125bcb4548e42b7e1a007385ee165b1c3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8445229681978799,
5
  "eval_steps": 500,
6
- "global_step": 239,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1680,6 +1680,314 @@
1680
  "learning_rate": 1.3598609942150765e-05,
1681
  "loss": 2.6368,
1682
  "step": 239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1683
  }
1684
  ],
1685
  "logging_steps": 1,
@@ -1694,12 +2002,12 @@
1694
  "should_evaluate": false,
1695
  "should_log": false,
1696
  "should_save": true,
1697
- "should_training_stop": false
1698
  },
1699
  "attributes": {}
1700
  }
1701
  },
1702
- "total_flos": 1.4782353377329152e+16,
1703
  "train_batch_size": 4,
1704
  "trial_name": null,
1705
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 283,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1680
  "learning_rate": 1.3598609942150765e-05,
1681
  "loss": 2.6368,
1682
  "step": 239
1683
+ },
1684
+ {
1685
+ "epoch": 0.8480565371024735,
1686
+ "grad_norm": 4.957668781280518,
1687
+ "learning_rate": 1.3015556956751669e-05,
1688
+ "loss": 2.4514,
1689
+ "step": 240
1690
+ },
1691
+ {
1692
+ "epoch": 0.8515901060070671,
1693
+ "grad_norm": 5.872159004211426,
1694
+ "learning_rate": 1.2443403456474017e-05,
1695
+ "loss": 2.6689,
1696
+ "step": 241
1697
+ },
1698
+ {
1699
+ "epoch": 0.8551236749116607,
1700
+ "grad_norm": 5.210415363311768,
1701
+ "learning_rate": 1.1882318057580489e-05,
1702
+ "loss": 2.5006,
1703
+ "step": 242
1704
+ },
1705
+ {
1706
+ "epoch": 0.8586572438162544,
1707
+ "grad_norm": 4.810140132904053,
1708
+ "learning_rate": 1.1332466114513512e-05,
1709
+ "loss": 2.193,
1710
+ "step": 243
1711
+ },
1712
+ {
1713
+ "epoch": 0.8621908127208481,
1714
+ "grad_norm": 5.142510414123535,
1715
+ "learning_rate": 1.0794009671164484e-05,
1716
+ "loss": 2.3454,
1717
+ "step": 244
1718
+ },
1719
+ {
1720
+ "epoch": 0.8657243816254417,
1721
+ "grad_norm": 5.115345478057861,
1722
+ "learning_rate": 1.0267107413118742e-05,
1723
+ "loss": 2.5682,
1724
+ "step": 245
1725
+ },
1726
+ {
1727
+ "epoch": 0.8692579505300353,
1728
+ "grad_norm": 5.960536479949951,
1729
+ "learning_rate": 9.751914620890206e-06,
1730
+ "loss": 2.5434,
1731
+ "step": 246
1732
+ },
1733
+ {
1734
+ "epoch": 0.872791519434629,
1735
+ "grad_norm": 4.932643890380859,
1736
+ "learning_rate": 9.248583124159438e-06,
1737
+ "loss": 2.4929,
1738
+ "step": 247
1739
+ },
1740
+ {
1741
+ "epoch": 0.8763250883392226,
1742
+ "grad_norm": 5.753042221069336,
1743
+ "learning_rate": 8.757261257028777e-06,
1744
+ "loss": 2.5304,
1745
+ "step": 248
1746
+ },
1747
+ {
1748
+ "epoch": 0.8798586572438163,
1749
+ "grad_norm": 6.017014026641846,
1750
+ "learning_rate": 8.278093814307637e-06,
1751
+ "loss": 2.5224,
1752
+ "step": 249
1753
+ },
1754
+ {
1755
+ "epoch": 0.8833922261484098,
1756
+ "grad_norm": 6.0025434494018555,
1757
+ "learning_rate": 7.81122200884072e-06,
1758
+ "loss": 2.7302,
1759
+ "step": 250
1760
+ },
1761
+ {
1762
+ "epoch": 0.8869257950530035,
1763
+ "grad_norm": 5.745126247406006,
1764
+ "learning_rate": 7.356783429892023e-06,
1765
+ "loss": 2.328,
1766
+ "step": 251
1767
+ },
1768
+ {
1769
+ "epoch": 0.8904593639575972,
1770
+ "grad_norm": 6.124932289123535,
1771
+ "learning_rate": 6.9149120025965905e-06,
1772
+ "loss": 2.3842,
1773
+ "step": 252
1774
+ },
1775
+ {
1776
+ "epoch": 0.8939929328621908,
1777
+ "grad_norm": 5.819494724273682,
1778
+ "learning_rate": 6.4857379484922375e-06,
1779
+ "loss": 2.5136,
1780
+ "step": 253
1781
+ },
1782
+ {
1783
+ "epoch": 0.8975265017667845,
1784
+ "grad_norm": 4.685893535614014,
1785
+ "learning_rate": 6.069387747142591e-06,
1786
+ "loss": 2.2869,
1787
+ "step": 254
1788
+ },
1789
+ {
1790
+ "epoch": 0.901060070671378,
1791
+ "grad_norm": 7.096498966217041,
1792
+ "learning_rate": 5.665984098862992e-06,
1793
+ "loss": 2.3135,
1794
+ "step": 255
1795
+ },
1796
+ {
1797
+ "epoch": 0.9045936395759717,
1798
+ "grad_norm": 4.7295732498168945,
1799
+ "learning_rate": 5.275645888560232e-06,
1800
+ "loss": 2.1201,
1801
+ "step": 256
1802
+ },
1803
+ {
1804
+ "epoch": 0.9081272084805654,
1805
+ "grad_norm": 4.792962551116943,
1806
+ "learning_rate": 4.898488150696467e-06,
1807
+ "loss": 2.4277,
1808
+ "step": 257
1809
+ },
1810
+ {
1811
+ "epoch": 0.911660777385159,
1812
+ "grad_norm": 5.9998345375061035,
1813
+ "learning_rate": 4.534622035388214e-06,
1814
+ "loss": 2.589,
1815
+ "step": 258
1816
+ },
1817
+ {
1818
+ "epoch": 0.9151943462897526,
1819
+ "grad_norm": 5.028961181640625,
1820
+ "learning_rate": 4.184154775649768e-06,
1821
+ "loss": 2.3586,
1822
+ "step": 259
1823
+ },
1824
+ {
1825
+ "epoch": 0.9187279151943463,
1826
+ "grad_norm": 4.820951461791992,
1827
+ "learning_rate": 3.8471896557912e-06,
1828
+ "loss": 2.2727,
1829
+ "step": 260
1830
+ },
1831
+ {
1832
+ "epoch": 0.9222614840989399,
1833
+ "grad_norm": 5.404629707336426,
1834
+ "learning_rate": 3.523825980979989e-06,
1835
+ "loss": 2.3514,
1836
+ "step": 261
1837
+ },
1838
+ {
1839
+ "epoch": 0.9257950530035336,
1840
+ "grad_norm": 4.80362606048584,
1841
+ "learning_rate": 3.2141590479753236e-06,
1842
+ "loss": 2.2188,
1843
+ "step": 262
1844
+ },
1845
+ {
1846
+ "epoch": 0.9293286219081273,
1847
+ "grad_norm": 5.06864070892334,
1848
+ "learning_rate": 2.918280117043709e-06,
1849
+ "loss": 2.1602,
1850
+ "step": 263
1851
+ },
1852
+ {
1853
+ "epoch": 0.9328621908127208,
1854
+ "grad_norm": 5.121250629425049,
1855
+ "learning_rate": 2.636276385064157e-06,
1856
+ "loss": 2.4903,
1857
+ "step": 264
1858
+ },
1859
+ {
1860
+ "epoch": 0.9363957597173145,
1861
+ "grad_norm": 5.582918167114258,
1862
+ "learning_rate": 2.3682309598308747e-06,
1863
+ "loss": 2.379,
1864
+ "step": 265
1865
+ },
1866
+ {
1867
+ "epoch": 0.9399293286219081,
1868
+ "grad_norm": 5.544982433319092,
1869
+ "learning_rate": 2.114222835560986e-06,
1870
+ "loss": 2.8578,
1871
+ "step": 266
1872
+ },
1873
+ {
1874
+ "epoch": 0.9434628975265018,
1875
+ "grad_norm": 4.647549629211426,
1876
+ "learning_rate": 1.8743268696145954e-06,
1877
+ "loss": 2.2086,
1878
+ "step": 267
1879
+ },
1880
+ {
1881
+ "epoch": 0.9469964664310954,
1882
+ "grad_norm": 4.952742576599121,
1883
+ "learning_rate": 1.6486137604339813e-06,
1884
+ "loss": 2.3761,
1885
+ "step": 268
1886
+ },
1887
+ {
1888
+ "epoch": 0.950530035335689,
1889
+ "grad_norm": 5.95998477935791,
1890
+ "learning_rate": 1.4371500267084338e-06,
1891
+ "loss": 2.5603,
1892
+ "step": 269
1893
+ },
1894
+ {
1895
+ "epoch": 0.9540636042402827,
1896
+ "grad_norm": 5.027519226074219,
1897
+ "learning_rate": 1.2399979877708745e-06,
1898
+ "loss": 2.3833,
1899
+ "step": 270
1900
+ },
1901
+ {
1902
+ "epoch": 0.9575971731448764,
1903
+ "grad_norm": 5.592859745025635,
1904
+ "learning_rate": 1.0572157452321097e-06,
1905
+ "loss": 2.7075,
1906
+ "step": 271
1907
+ },
1908
+ {
1909
+ "epoch": 0.9611307420494699,
1910
+ "grad_norm": 4.911261081695557,
1911
+ "learning_rate": 8.888571658579703e-07,
1912
+ "loss": 2.2938,
1913
+ "step": 272
1914
+ },
1915
+ {
1916
+ "epoch": 0.9646643109540636,
1917
+ "grad_norm": 5.232736587524414,
1918
+ "learning_rate": 7.349718656945504e-07,
1919
+ "loss": 2.1393,
1920
+ "step": 273
1921
+ },
1922
+ {
1923
+ "epoch": 0.9681978798586572,
1924
+ "grad_norm": 5.2621331214904785,
1925
+ "learning_rate": 5.956051954461472e-07,
1926
+ "loss": 2.7253,
1927
+ "step": 274
1928
+ },
1929
+ {
1930
+ "epoch": 0.9717314487632509,
1931
+ "grad_norm": 4.635681629180908,
1932
+ "learning_rate": 4.7079822711015296e-07,
1933
+ "loss": 2.2413,
1934
+ "step": 275
1935
+ },
1936
+ {
1937
+ "epoch": 0.9752650176678446,
1938
+ "grad_norm": 5.374542236328125,
1939
+ "learning_rate": 3.605877418729975e-07,
1940
+ "loss": 2.6089,
1941
+ "step": 276
1942
+ },
1943
+ {
1944
+ "epoch": 0.9787985865724381,
1945
+ "grad_norm": 4.751713275909424,
1946
+ "learning_rate": 2.6500621927054715e-07,
1947
+ "loss": 2.3142,
1948
+ "step": 277
1949
+ },
1950
+ {
1951
+ "epoch": 0.9823321554770318,
1952
+ "grad_norm": 5.122497081756592,
1953
+ "learning_rate": 1.840818276162226e-07,
1954
+ "loss": 2.3476,
1955
+ "step": 278
1956
+ },
1957
+ {
1958
+ "epoch": 0.9858657243816255,
1959
+ "grad_norm": 6.561892509460449,
1960
+ "learning_rate": 1.1783841569968367e-07,
1961
+ "loss": 2.6703,
1962
+ "step": 279
1963
+ },
1964
+ {
1965
+ "epoch": 0.9893992932862191,
1966
+ "grad_norm": 4.851873874664307,
1967
+ "learning_rate": 6.629550575847354e-08,
1968
+ "loss": 2.3439,
1969
+ "step": 280
1970
+ },
1971
+ {
1972
+ "epoch": 0.9929328621908127,
1973
+ "grad_norm": 5.513582229614258,
1974
+ "learning_rate": 2.946828772473764e-08,
1975
+ "loss": 2.6915,
1976
+ "step": 281
1977
+ },
1978
+ {
1979
+ "epoch": 0.9964664310954063,
1980
+ "grad_norm": 5.608765602111816,
1981
+ "learning_rate": 7.36761474865455e-09,
1982
+ "loss": 2.5523,
1983
+ "step": 282
1984
+ },
1985
+ {
1986
+ "epoch": 1.0,
1987
+ "grad_norm": 5.803535461425781,
1988
+ "learning_rate": 0.0,
1989
+ "loss": 2.6483,
1990
+ "step": 283
1991
  }
1992
  ],
1993
  "logging_steps": 1,
 
2002
  "should_evaluate": false,
2003
  "should_log": false,
2004
  "should_save": true,
2005
+ "should_training_stop": true
2006
  },
2007
  "attributes": {}
2008
  }
2009
  },
2010
+ "total_flos": 1.7503790819180544e+16,
2011
  "train_batch_size": 4,
2012
  "trial_name": null,
2013
  "trial_params": null