linshoufan commited on
Commit
994d58f
1 Parent(s): 4653e71

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d5bd30929a6dd034a66c5d64682b9b3cdc6f1915335f3bc1d67c8a2e7afa275
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78caa2c38a325571429f5192c54b55e7acd2942178a3b3a29e1355f64718dfc3
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0673f09c0825fa04b428f3263c1ec340a98a30b0050d9b657d3760f11d8c4051
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34207402f7ac29c10b51b15618427b3becb7efb4d9c4faeb05f3eba0719776d5
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eff99030c3d6f5750daa7c2be6d4f88cb3d11423c32100f3c68edcfd52a81b3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9215fd4e50baa3d875dcc8889839ab8d7f5341bf2c225bcc0e39d3faf0a13e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8beec2a3ed2d46be0542aeb907aa0e1e4613601adbf391ce67dcb87b78a7321a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2feb74370175d96a6640e7f04476c37d2c41e3013d5d40dcad7ab974a5f7bc5f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 30.15735431390852,
3
- "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-5500",
4
- "epoch": 1.7679202828672453,
5
  "eval_steps": 500,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1646,6 +1646,155 @@
1646
  "eval_samples_per_second": 2.371,
1647
  "eval_steps_per_second": 0.297,
1648
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1649
  }
1650
  ],
1651
  "logging_steps": 25,
@@ -1653,7 +1802,7 @@
1653
  "num_input_tokens_seen": 0,
1654
  "num_train_epochs": 2,
1655
  "save_steps": 500,
1656
- "total_flos": 2.539551522816e+19,
1657
  "train_batch_size": 16,
1658
  "trial_name": null,
1659
  "trial_params": null
 
1
  {
2
+ "best_metric": 29.697292972396323,
3
+ "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-6000",
4
+ "epoch": 1.9286403085824495,
5
  "eval_steps": 500,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1646
  "eval_samples_per_second": 2.371,
1647
  "eval_steps_per_second": 0.297,
1648
  "step": 5500
1649
+ },
1650
+ {
1651
+ "epoch": 1.78,
1652
+ "grad_norm": 7.236656188964844,
1653
+ "learning_rate": 1.138516824567135e-06,
1654
+ "loss": 0.1295,
1655
+ "step": 5525
1656
+ },
1657
+ {
1658
+ "epoch": 1.78,
1659
+ "grad_norm": 6.239099502563477,
1660
+ "learning_rate": 1.0976804965697485e-06,
1661
+ "loss": 0.1378,
1662
+ "step": 5550
1663
+ },
1664
+ {
1665
+ "epoch": 1.79,
1666
+ "grad_norm": 4.9148945808410645,
1667
+ "learning_rate": 1.056844168572362e-06,
1668
+ "loss": 0.1272,
1669
+ "step": 5575
1670
+ },
1671
+ {
1672
+ "epoch": 1.8,
1673
+ "grad_norm": 7.572327136993408,
1674
+ "learning_rate": 1.0160078405749757e-06,
1675
+ "loss": 0.1405,
1676
+ "step": 5600
1677
+ },
1678
+ {
1679
+ "epoch": 1.81,
1680
+ "grad_norm": 6.76165771484375,
1681
+ "learning_rate": 9.751715125775892e-07,
1682
+ "loss": 0.1351,
1683
+ "step": 5625
1684
+ },
1685
+ {
1686
+ "epoch": 1.82,
1687
+ "grad_norm": 10.984220504760742,
1688
+ "learning_rate": 9.343351845802026e-07,
1689
+ "loss": 0.1467,
1690
+ "step": 5650
1691
+ },
1692
+ {
1693
+ "epoch": 1.82,
1694
+ "grad_norm": 4.543166637420654,
1695
+ "learning_rate": 8.934988565828162e-07,
1696
+ "loss": 0.1175,
1697
+ "step": 5675
1698
+ },
1699
+ {
1700
+ "epoch": 1.83,
1701
+ "grad_norm": 8.191649436950684,
1702
+ "learning_rate": 8.526625285854297e-07,
1703
+ "loss": 0.1388,
1704
+ "step": 5700
1705
+ },
1706
+ {
1707
+ "epoch": 1.84,
1708
+ "grad_norm": 7.215826988220215,
1709
+ "learning_rate": 8.118262005880432e-07,
1710
+ "loss": 0.1354,
1711
+ "step": 5725
1712
+ },
1713
+ {
1714
+ "epoch": 1.85,
1715
+ "grad_norm": 5.940629959106445,
1716
+ "learning_rate": 7.709898725906567e-07,
1717
+ "loss": 0.1283,
1718
+ "step": 5750
1719
+ },
1720
+ {
1721
+ "epoch": 1.86,
1722
+ "grad_norm": 6.796767234802246,
1723
+ "learning_rate": 7.301535445932702e-07,
1724
+ "loss": 0.1274,
1725
+ "step": 5775
1726
+ },
1727
+ {
1728
+ "epoch": 1.86,
1729
+ "grad_norm": 8.403697967529297,
1730
+ "learning_rate": 6.893172165958838e-07,
1731
+ "loss": 0.1333,
1732
+ "step": 5800
1733
+ },
1734
+ {
1735
+ "epoch": 1.87,
1736
+ "grad_norm": 6.127229690551758,
1737
+ "learning_rate": 6.484808885984972e-07,
1738
+ "loss": 0.1406,
1739
+ "step": 5825
1740
+ },
1741
+ {
1742
+ "epoch": 1.88,
1743
+ "grad_norm": 7.16465950012207,
1744
+ "learning_rate": 6.076445606011108e-07,
1745
+ "loss": 0.132,
1746
+ "step": 5850
1747
+ },
1748
+ {
1749
+ "epoch": 1.89,
1750
+ "grad_norm": 5.777968406677246,
1751
+ "learning_rate": 5.668082326037243e-07,
1752
+ "loss": 0.1437,
1753
+ "step": 5875
1754
+ },
1755
+ {
1756
+ "epoch": 1.9,
1757
+ "grad_norm": 6.021764755249023,
1758
+ "learning_rate": 5.259719046063379e-07,
1759
+ "loss": 0.1203,
1760
+ "step": 5900
1761
+ },
1762
+ {
1763
+ "epoch": 1.9,
1764
+ "grad_norm": 5.480493068695068,
1765
+ "learning_rate": 4.851355766089514e-07,
1766
+ "loss": 0.1398,
1767
+ "step": 5925
1768
+ },
1769
+ {
1770
+ "epoch": 1.91,
1771
+ "grad_norm": 7.609493732452393,
1772
+ "learning_rate": 4.442992486115649e-07,
1773
+ "loss": 0.1274,
1774
+ "step": 5950
1775
+ },
1776
+ {
1777
+ "epoch": 1.92,
1778
+ "grad_norm": 5.910650730133057,
1779
+ "learning_rate": 4.034629206141784e-07,
1780
+ "loss": 0.1352,
1781
+ "step": 5975
1782
+ },
1783
+ {
1784
+ "epoch": 1.93,
1785
+ "grad_norm": 4.371640682220459,
1786
+ "learning_rate": 3.626265926167919e-07,
1787
+ "loss": 0.1265,
1788
+ "step": 6000
1789
+ },
1790
+ {
1791
+ "epoch": 1.93,
1792
+ "eval_cer": 29.697292972396323,
1793
+ "eval_loss": 0.37359777092933655,
1794
+ "eval_runtime": 1867.3275,
1795
+ "eval_samples_per_second": 2.376,
1796
+ "eval_steps_per_second": 0.297,
1797
+ "step": 6000
1798
  }
1799
  ],
1800
  "logging_steps": 25,
 
1802
  "num_input_tokens_seen": 0,
1803
  "num_train_epochs": 2,
1804
  "save_steps": 500,
1805
+ "total_flos": 2.770419843072e+19,
1806
  "train_batch_size": 16,
1807
  "trial_name": null,
1808
  "trial_params": null