nbtpj commited on
Commit
72688c9
1 Parent(s): b3da908

Training in progress, step 30000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f04bebc3423c9754bc5ca418b7467629af05af26e1266c57de200e5bd906de93
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2754a19deafed33f80b8bc9e1d496271fbdefd4dde6d02139929e3ddf01b008f
3
  size 1115513717
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e154bc9d87e516bef6fa1fe3f5eab0c1a369245f153611b9ee0abd80d50cac
3
- size 15459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6f2984904c3cd3157bcf9409a340770e020350dad6bed8f8d65bce2381442b
3
+ size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68af8dadd166b77d3ac3501660bb80d8a28f8c54961c3c81b3e418d659352771
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3cf89aef0bc5afe4fc7de6b797864a345f7acd73a830413d35a9d7579e5dbe
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6221317760868281,
5
- "global_step": 27500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1672,11 +1672,169 @@
1672
  "learning_rate": 0.0,
1673
  "loss": 0.6499,
1674
  "step": 27500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1675
  }
1676
  ],
1677
  "max_steps": 30516,
1678
  "num_train_epochs": 2,
1679
- "total_flos": 1.6758234520215552e+17,
1680
  "trial_name": null,
1681
  "trial_params": null
1682
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7695983011856309,
5
+ "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1672
  "learning_rate": 0.0,
1673
  "loss": 0.6499,
1674
  "step": 27500
1675
+ },
1676
+ {
1677
+ "epoch": 1.63,
1678
+ "learning_rate": 0.0,
1679
+ "loss": 0.6648,
1680
+ "step": 27600
1681
+ },
1682
+ {
1683
+ "epoch": 1.63,
1684
+ "learning_rate": 0.0,
1685
+ "loss": 0.6292,
1686
+ "step": 27700
1687
+ },
1688
+ {
1689
+ "epoch": 1.64,
1690
+ "learning_rate": 0.0,
1691
+ "loss": 0.5542,
1692
+ "step": 27800
1693
+ },
1694
+ {
1695
+ "epoch": 1.65,
1696
+ "learning_rate": 0.0,
1697
+ "loss": 0.5549,
1698
+ "step": 27900
1699
+ },
1700
+ {
1701
+ "epoch": 1.65,
1702
+ "learning_rate": 0.0,
1703
+ "loss": 0.6058,
1704
+ "step": 28000
1705
+ },
1706
+ {
1707
+ "epoch": 1.66,
1708
+ "learning_rate": 0.0,
1709
+ "loss": 0.565,
1710
+ "step": 28100
1711
+ },
1712
+ {
1713
+ "epoch": 1.66,
1714
+ "learning_rate": 0.0,
1715
+ "loss": 0.7065,
1716
+ "step": 28200
1717
+ },
1718
+ {
1719
+ "epoch": 1.67,
1720
+ "learning_rate": 0.0,
1721
+ "loss": 0.6621,
1722
+ "step": 28300
1723
+ },
1724
+ {
1725
+ "epoch": 1.68,
1726
+ "learning_rate": 0.0,
1727
+ "loss": 0.6052,
1728
+ "step": 28400
1729
+ },
1730
+ {
1731
+ "epoch": 1.68,
1732
+ "learning_rate": 0.0,
1733
+ "loss": 0.6506,
1734
+ "step": 28500
1735
+ },
1736
+ {
1737
+ "epoch": 1.69,
1738
+ "learning_rate": 0.0,
1739
+ "loss": 0.7008,
1740
+ "step": 28600
1741
+ },
1742
+ {
1743
+ "epoch": 1.69,
1744
+ "learning_rate": 0.0,
1745
+ "loss": 0.6847,
1746
+ "step": 28700
1747
+ },
1748
+ {
1749
+ "epoch": 1.7,
1750
+ "learning_rate": 0.0,
1751
+ "loss": 0.6591,
1752
+ "step": 28800
1753
+ },
1754
+ {
1755
+ "epoch": 1.7,
1756
+ "learning_rate": 0.0,
1757
+ "loss": 0.6289,
1758
+ "step": 28900
1759
+ },
1760
+ {
1761
+ "epoch": 1.71,
1762
+ "learning_rate": 0.0,
1763
+ "loss": 0.5949,
1764
+ "step": 29000
1765
+ },
1766
+ {
1767
+ "epoch": 1.72,
1768
+ "learning_rate": 0.0,
1769
+ "loss": 0.6497,
1770
+ "step": 29100
1771
+ },
1772
+ {
1773
+ "epoch": 1.72,
1774
+ "learning_rate": 0.0,
1775
+ "loss": 0.5947,
1776
+ "step": 29200
1777
+ },
1778
+ {
1779
+ "epoch": 1.73,
1780
+ "learning_rate": 0.0,
1781
+ "loss": 0.6483,
1782
+ "step": 29300
1783
+ },
1784
+ {
1785
+ "epoch": 1.73,
1786
+ "learning_rate": 0.0,
1787
+ "loss": 0.5743,
1788
+ "step": 29400
1789
+ },
1790
+ {
1791
+ "epoch": 1.74,
1792
+ "learning_rate": 0.0,
1793
+ "loss": 0.6876,
1794
+ "step": 29500
1795
+ },
1796
+ {
1797
+ "epoch": 1.75,
1798
+ "learning_rate": 0.0,
1799
+ "loss": 0.6503,
1800
+ "step": 29600
1801
+ },
1802
+ {
1803
+ "epoch": 1.75,
1804
+ "learning_rate": 0.0,
1805
+ "loss": 0.5963,
1806
+ "step": 29700
1807
+ },
1808
+ {
1809
+ "epoch": 1.76,
1810
+ "learning_rate": 0.0,
1811
+ "loss": 0.6361,
1812
+ "step": 29800
1813
+ },
1814
+ {
1815
+ "epoch": 1.76,
1816
+ "learning_rate": 0.0,
1817
+ "loss": 0.6624,
1818
+ "step": 29900
1819
+ },
1820
+ {
1821
+ "epoch": 1.77,
1822
+ "learning_rate": 0.0,
1823
+ "loss": 0.6298,
1824
+ "step": 30000
1825
+ },
1826
+ {
1827
+ "epoch": 1.77,
1828
+ "eval_loss": 0.8651230335235596,
1829
+ "eval_runtime": 546.7955,
1830
+ "eval_samples_per_second": 29.15,
1831
+ "eval_steps_per_second": 1.823,
1832
+ "step": 30000
1833
  }
1834
  ],
1835
  "max_steps": 30516,
1836
  "num_train_epochs": 2,
1837
+ "total_flos": 1.8284013786415104e+17,
1838
  "trial_name": null,
1839
  "trial_params": null
1840
  }
runs/Dec05_03-30-37_fbdce2302f52/events.out.tfevents.1670211053.fbdce2302f52.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6970173370577d76d998a4804604477fa1af002f43070cb67cef3855a24c8ef6
3
- size 53461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b673d9ccde84e81880a851f0f5abf58604d05b17b5b045206afb9d74ba26fb
3
+ size 57737