warmestman commited on
Commit
359c7aa
1 Parent(s): f6d4cbe

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cdd883a5460a3d6fef3ba86dc48a5c7818f0b59f546569efce6e0c6c05644fb
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da65298fcbe65ec90ace7a34200b691786bbbf741cb0bcefde53bae793c0c150
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c3361154434454a2c82e489b524924f18144e43f09f9bc443707201e953d6cb
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b4684e020b85bbfca68cd478e00f24ed336b20da011cf724b3610bbdaf0ebb
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8225f7d2b350449309cf6cb95eacece99044be12a3a7aa483bdf0783e22ec40f
3
  size 3095446256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab032ab8a6d161826bec34f3047be0921deb634160832f7b92503b7128c5ea4
3
  size 3095446256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da289394bd41e2cdd34e1fa9b7ba6ae9221802035398fe0e37a4ddc28b5db9f6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2de0b7a38bec3fb98aa32f323336a214d83264d9703f7d612b7245ed18581e0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:380aebeeba070e703dcd469862cff7b4aace5b115dadb0d7385847d018530611
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c52f6b6405132e6ee658c09ea95ff4a1e46ef9dee4259ea08581b6ffffd433f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 41.91738628238271,
3
  "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
4
- "epoch": 41.91616766467066,
5
  "eval_steps": 1000,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1750,6 +1750,255 @@
1750
  "eval_steps_per_second": 0.089,
1751
  "eval_wer": 42.35190583576261,
1752
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1753
  }
1754
  ],
1755
  "logging_steps": 25,
@@ -1757,7 +2006,7 @@
1757
  "num_input_tokens_seen": 0,
1758
  "num_train_epochs": 120,
1759
  "save_steps": 1000,
1760
- "total_flos": 3.801019180430131e+20,
1761
  "train_batch_size": 16,
1762
  "trial_name": null,
1763
  "trial_params": null
 
1
  {
2
  "best_metric": 41.91738628238271,
3
  "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
4
+ "epoch": 47.90419161676647,
5
  "eval_steps": 1000,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1750
  "eval_steps_per_second": 0.089,
1751
  "eval_wer": 42.35190583576261,
1752
  "step": 7000
1753
+ },
1754
+ {
1755
+ "epoch": 42.07,
1756
+ "learning_rate": 6.654871794871795e-07,
1757
+ "loss": 0.0011,
1758
+ "step": 7025
1759
+ },
1760
+ {
1761
+ "epoch": 42.22,
1762
+ "learning_rate": 6.642051282051282e-07,
1763
+ "loss": 0.0009,
1764
+ "step": 7050
1765
+ },
1766
+ {
1767
+ "epoch": 42.37,
1768
+ "learning_rate": 6.629230769230769e-07,
1769
+ "loss": 0.001,
1770
+ "step": 7075
1771
+ },
1772
+ {
1773
+ "epoch": 42.51,
1774
+ "learning_rate": 6.616410256410256e-07,
1775
+ "loss": 0.0011,
1776
+ "step": 7100
1777
+ },
1778
+ {
1779
+ "epoch": 42.66,
1780
+ "learning_rate": 6.603589743589744e-07,
1781
+ "loss": 0.0014,
1782
+ "step": 7125
1783
+ },
1784
+ {
1785
+ "epoch": 42.81,
1786
+ "learning_rate": 6.590769230769231e-07,
1787
+ "loss": 0.001,
1788
+ "step": 7150
1789
+ },
1790
+ {
1791
+ "epoch": 42.96,
1792
+ "learning_rate": 6.577948717948718e-07,
1793
+ "loss": 0.0012,
1794
+ "step": 7175
1795
+ },
1796
+ {
1797
+ "epoch": 43.11,
1798
+ "learning_rate": 6.565128205128205e-07,
1799
+ "loss": 0.0011,
1800
+ "step": 7200
1801
+ },
1802
+ {
1803
+ "epoch": 43.26,
1804
+ "learning_rate": 6.552307692307693e-07,
1805
+ "loss": 0.001,
1806
+ "step": 7225
1807
+ },
1808
+ {
1809
+ "epoch": 43.41,
1810
+ "learning_rate": 6.539487179487179e-07,
1811
+ "loss": 0.0015,
1812
+ "step": 7250
1813
+ },
1814
+ {
1815
+ "epoch": 43.56,
1816
+ "learning_rate": 6.526666666666666e-07,
1817
+ "loss": 0.0016,
1818
+ "step": 7275
1819
+ },
1820
+ {
1821
+ "epoch": 43.71,
1822
+ "learning_rate": 6.513846153846153e-07,
1823
+ "loss": 0.0017,
1824
+ "step": 7300
1825
+ },
1826
+ {
1827
+ "epoch": 43.86,
1828
+ "learning_rate": 6.501025641025641e-07,
1829
+ "loss": 0.0014,
1830
+ "step": 7325
1831
+ },
1832
+ {
1833
+ "epoch": 44.01,
1834
+ "learning_rate": 6.488205128205128e-07,
1835
+ "loss": 0.0012,
1836
+ "step": 7350
1837
+ },
1838
+ {
1839
+ "epoch": 44.16,
1840
+ "learning_rate": 6.475384615384615e-07,
1841
+ "loss": 0.0014,
1842
+ "step": 7375
1843
+ },
1844
+ {
1845
+ "epoch": 44.31,
1846
+ "learning_rate": 6.462564102564102e-07,
1847
+ "loss": 0.001,
1848
+ "step": 7400
1849
+ },
1850
+ {
1851
+ "epoch": 44.46,
1852
+ "learning_rate": 6.44974358974359e-07,
1853
+ "loss": 0.0011,
1854
+ "step": 7425
1855
+ },
1856
+ {
1857
+ "epoch": 44.61,
1858
+ "learning_rate": 6.436923076923077e-07,
1859
+ "loss": 0.0012,
1860
+ "step": 7450
1861
+ },
1862
+ {
1863
+ "epoch": 44.76,
1864
+ "learning_rate": 6.424102564102564e-07,
1865
+ "loss": 0.0011,
1866
+ "step": 7475
1867
+ },
1868
+ {
1869
+ "epoch": 44.91,
1870
+ "learning_rate": 6.411282051282051e-07,
1871
+ "loss": 0.0012,
1872
+ "step": 7500
1873
+ },
1874
+ {
1875
+ "epoch": 45.06,
1876
+ "learning_rate": 6.398461538461539e-07,
1877
+ "loss": 0.0011,
1878
+ "step": 7525
1879
+ },
1880
+ {
1881
+ "epoch": 45.21,
1882
+ "learning_rate": 6.385641025641026e-07,
1883
+ "loss": 0.0008,
1884
+ "step": 7550
1885
+ },
1886
+ {
1887
+ "epoch": 45.36,
1888
+ "learning_rate": 6.372820512820513e-07,
1889
+ "loss": 0.001,
1890
+ "step": 7575
1891
+ },
1892
+ {
1893
+ "epoch": 45.51,
1894
+ "learning_rate": 6.36e-07,
1895
+ "loss": 0.0011,
1896
+ "step": 7600
1897
+ },
1898
+ {
1899
+ "epoch": 45.66,
1900
+ "learning_rate": 6.347179487179488e-07,
1901
+ "loss": 0.0009,
1902
+ "step": 7625
1903
+ },
1904
+ {
1905
+ "epoch": 45.81,
1906
+ "learning_rate": 6.334358974358974e-07,
1907
+ "loss": 0.0011,
1908
+ "step": 7650
1909
+ },
1910
+ {
1911
+ "epoch": 45.96,
1912
+ "learning_rate": 6.321538461538461e-07,
1913
+ "loss": 0.0009,
1914
+ "step": 7675
1915
+ },
1916
+ {
1917
+ "epoch": 46.11,
1918
+ "learning_rate": 6.308717948717948e-07,
1919
+ "loss": 0.0007,
1920
+ "step": 7700
1921
+ },
1922
+ {
1923
+ "epoch": 46.26,
1924
+ "learning_rate": 6.295897435897435e-07,
1925
+ "loss": 0.0006,
1926
+ "step": 7725
1927
+ },
1928
+ {
1929
+ "epoch": 46.41,
1930
+ "learning_rate": 6.283076923076923e-07,
1931
+ "loss": 0.0006,
1932
+ "step": 7750
1933
+ },
1934
+ {
1935
+ "epoch": 46.56,
1936
+ "learning_rate": 6.27025641025641e-07,
1937
+ "loss": 0.0008,
1938
+ "step": 7775
1939
+ },
1940
+ {
1941
+ "epoch": 46.71,
1942
+ "learning_rate": 6.257435897435897e-07,
1943
+ "loss": 0.0007,
1944
+ "step": 7800
1945
+ },
1946
+ {
1947
+ "epoch": 46.86,
1948
+ "learning_rate": 6.244615384615384e-07,
1949
+ "loss": 0.0008,
1950
+ "step": 7825
1951
+ },
1952
+ {
1953
+ "epoch": 47.01,
1954
+ "learning_rate": 6.231794871794872e-07,
1955
+ "loss": 0.0007,
1956
+ "step": 7850
1957
+ },
1958
+ {
1959
+ "epoch": 47.16,
1960
+ "learning_rate": 6.218974358974358e-07,
1961
+ "loss": 0.0005,
1962
+ "step": 7875
1963
+ },
1964
+ {
1965
+ "epoch": 47.31,
1966
+ "learning_rate": 6.206153846153845e-07,
1967
+ "loss": 0.0005,
1968
+ "step": 7900
1969
+ },
1970
+ {
1971
+ "epoch": 47.46,
1972
+ "learning_rate": 6.193333333333332e-07,
1973
+ "loss": 0.0005,
1974
+ "step": 7925
1975
+ },
1976
+ {
1977
+ "epoch": 47.6,
1978
+ "learning_rate": 6.18051282051282e-07,
1979
+ "loss": 0.0004,
1980
+ "step": 7950
1981
+ },
1982
+ {
1983
+ "epoch": 47.75,
1984
+ "learning_rate": 6.167692307692307e-07,
1985
+ "loss": 0.0004,
1986
+ "step": 7975
1987
+ },
1988
+ {
1989
+ "epoch": 47.9,
1990
+ "learning_rate": 6.154871794871794e-07,
1991
+ "loss": 0.0005,
1992
+ "step": 8000
1993
+ },
1994
+ {
1995
+ "epoch": 47.9,
1996
+ "eval_loss": 0.7143049240112305,
1997
+ "eval_runtime": 593.312,
1998
+ "eval_samples_per_second": 0.706,
1999
+ "eval_steps_per_second": 0.089,
2000
+ "eval_wer": 42.22540014300644,
2001
+ "step": 8000
2002
  }
2003
  ],
2004
  "logging_steps": 25,
 
2006
  "num_input_tokens_seen": 0,
2007
  "num_train_epochs": 120,
2008
  "save_steps": 1000,
2009
+ "total_flos": 4.3440073597845504e+20,
2010
  "train_batch_size": 16,
2011
  "trial_name": null,
2012
  "trial_params": null