nicolasdupuisroy commited on
Commit
02a0513
1 Parent(s): a9c4ec7

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - imagefolder
@@ -22,7 +24,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7461538461538462
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +34,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.6263
36
- - Accuracy: 0.7462
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - imagefolder
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.7769230769230769
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
36
  It achieves the following results on the evaluation set:
37
+ - Loss: 1.6098
38
+ - Accuracy: 0.7769
39
 
40
  ## Model description
41
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 120.0,
3
- "eval_accuracy": 0.5846153846153846,
4
- "eval_loss": 2.5402629375457764,
5
- "eval_runtime": 2.5184,
6
- "eval_samples_per_second": 51.619,
7
- "eval_steps_per_second": 0.794,
8
- "train_loss": 0.3072653747740246,
9
- "train_runtime": 892.0889,
10
- "train_samples_per_second": 69.948,
11
- "train_steps_per_second": 0.942
12
  }
 
1
  {
2
+ "epoch": 200.0,
3
+ "eval_accuracy": 0.7769230769230769,
4
+ "eval_loss": 1.6097954511642456,
5
+ "eval_runtime": 2.2046,
6
+ "eval_samples_per_second": 58.968,
7
+ "eval_steps_per_second": 0.907,
8
+ "train_loss": 0.39147548845836094,
9
+ "train_runtime": 3403.5317,
10
+ "train_samples_per_second": 30.556,
11
+ "train_steps_per_second": 0.411
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 120.0,
3
- "eval_accuracy": 0.5846153846153846,
4
- "eval_loss": 2.5402629375457764,
5
- "eval_runtime": 2.5184,
6
- "eval_samples_per_second": 51.619,
7
- "eval_steps_per_second": 0.794
8
  }
 
1
  {
2
+ "epoch": 200.0,
3
+ "eval_accuracy": 0.7769230769230769,
4
+ "eval_loss": 1.6097954511642456,
5
+ "eval_runtime": 2.2046,
6
+ "eval_samples_per_second": 58.968,
7
+ "eval_steps_per_second": 0.907
8
  }
runs/Jan17_21-31-37_c6ad14a30b7d/events.out.tfevents.1705530566.c6ad14a30b7d.8359.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03fcbbac7e99b1668601ea3ba58c311f35f655bf205c25b24d0c175be83d9550
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 120.0,
3
- "train_loss": 0.3072653747740246,
4
- "train_runtime": 892.0889,
5
- "train_samples_per_second": 69.948,
6
- "train_steps_per_second": 0.942
7
  }
 
1
  {
2
+ "epoch": 200.0,
3
+ "train_loss": 0.39147548845836094,
4
+ "train_runtime": 3403.5317,
5
+ "train_samples_per_second": 30.556,
6
+ "train_steps_per_second": 0.411
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.5402629375457764,
3
- "best_model_checkpoint": "./drive/MyDrive/repositories/torch_example_image-classification/outputs_letter3/checkpoint-840",
4
- "epoch": 120.0,
5
  "eval_steps": 500,
6
- "global_step": 840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1609,14 +1609,1079 @@
1609
  "train_runtime": 892.0889,
1610
  "train_samples_per_second": 69.948,
1611
  "train_steps_per_second": 0.942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1612
  }
1613
  ],
1614
  "logging_steps": 10,
1615
- "max_steps": 840,
1616
  "num_input_tokens_seen": 0,
1617
- "num_train_epochs": 120,
1618
  "save_steps": 500,
1619
- "total_flos": 4.837667104869581e+18,
1620
  "train_batch_size": 80,
1621
  "trial_name": null,
1622
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6097954511642456,
3
+ "best_model_checkpoint": "./outputs_letter3/checkpoint-1386",
4
+ "epoch": 200.0,
5
  "eval_steps": 500,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1609
  "train_runtime": 892.0889,
1610
  "train_samples_per_second": 69.948,
1611
  "train_steps_per_second": 0.942
1612
+ },
1613
+ {
1614
+ "epoch": 121.0,
1615
+ "eval_accuracy": 0.6692307692307692,
1616
+ "eval_loss": 2.35441255569458,
1617
+ "eval_runtime": 2.1277,
1618
+ "eval_samples_per_second": 61.098,
1619
+ "eval_steps_per_second": 0.94,
1620
+ "step": 847
1621
+ },
1622
+ {
1623
+ "epoch": 121.43,
1624
+ "learning_rate": 1.985714285714286e-05,
1625
+ "loss": 1.4504,
1626
+ "step": 850
1627
+ },
1628
+ {
1629
+ "epoch": 122.0,
1630
+ "eval_accuracy": 0.6461538461538462,
1631
+ "eval_loss": 2.3751370906829834,
1632
+ "eval_runtime": 2.1033,
1633
+ "eval_samples_per_second": 61.809,
1634
+ "eval_steps_per_second": 0.951,
1635
+ "step": 854
1636
+ },
1637
+ {
1638
+ "epoch": 122.86,
1639
+ "learning_rate": 1.9714285714285718e-05,
1640
+ "loss": 1.3963,
1641
+ "step": 860
1642
+ },
1643
+ {
1644
+ "epoch": 123.0,
1645
+ "eval_accuracy": 0.6538461538461539,
1646
+ "eval_loss": 2.3588593006134033,
1647
+ "eval_runtime": 2.0842,
1648
+ "eval_samples_per_second": 62.373,
1649
+ "eval_steps_per_second": 0.96,
1650
+ "step": 861
1651
+ },
1652
+ {
1653
+ "epoch": 124.0,
1654
+ "eval_accuracy": 0.6538461538461539,
1655
+ "eval_loss": 2.353787899017334,
1656
+ "eval_runtime": 2.1043,
1657
+ "eval_samples_per_second": 61.78,
1658
+ "eval_steps_per_second": 0.95,
1659
+ "step": 868
1660
+ },
1661
+ {
1662
+ "epoch": 124.29,
1663
+ "learning_rate": 1.9571428571428572e-05,
1664
+ "loss": 1.4087,
1665
+ "step": 870
1666
+ },
1667
+ {
1668
+ "epoch": 125.0,
1669
+ "eval_accuracy": 0.6461538461538462,
1670
+ "eval_loss": 2.326305627822876,
1671
+ "eval_runtime": 2.1627,
1672
+ "eval_samples_per_second": 60.11,
1673
+ "eval_steps_per_second": 0.925,
1674
+ "step": 875
1675
+ },
1676
+ {
1677
+ "epoch": 125.71,
1678
+ "learning_rate": 1.942857142857143e-05,
1679
+ "loss": 1.3564,
1680
+ "step": 880
1681
+ },
1682
+ {
1683
+ "epoch": 126.0,
1684
+ "eval_accuracy": 0.6230769230769231,
1685
+ "eval_loss": 2.3176610469818115,
1686
+ "eval_runtime": 2.2283,
1687
+ "eval_samples_per_second": 58.34,
1688
+ "eval_steps_per_second": 0.898,
1689
+ "step": 882
1690
+ },
1691
+ {
1692
+ "epoch": 127.0,
1693
+ "eval_accuracy": 0.6615384615384615,
1694
+ "eval_loss": 2.263662099838257,
1695
+ "eval_runtime": 2.1946,
1696
+ "eval_samples_per_second": 59.236,
1697
+ "eval_steps_per_second": 0.911,
1698
+ "step": 889
1699
+ },
1700
+ {
1701
+ "epoch": 127.14,
1702
+ "learning_rate": 1.928571428571429e-05,
1703
+ "loss": 1.373,
1704
+ "step": 890
1705
+ },
1706
+ {
1707
+ "epoch": 128.0,
1708
+ "eval_accuracy": 0.6538461538461539,
1709
+ "eval_loss": 2.2609002590179443,
1710
+ "eval_runtime": 2.3121,
1711
+ "eval_samples_per_second": 56.225,
1712
+ "eval_steps_per_second": 0.865,
1713
+ "step": 896
1714
+ },
1715
+ {
1716
+ "epoch": 128.57,
1717
+ "learning_rate": 1.9142857142857146e-05,
1718
+ "loss": 1.346,
1719
+ "step": 900
1720
+ },
1721
+ {
1722
+ "epoch": 129.0,
1723
+ "eval_accuracy": 0.6538461538461539,
1724
+ "eval_loss": 2.259404182434082,
1725
+ "eval_runtime": 2.1822,
1726
+ "eval_samples_per_second": 59.572,
1727
+ "eval_steps_per_second": 0.916,
1728
+ "step": 903
1729
+ },
1730
+ {
1731
+ "epoch": 130.0,
1732
+ "learning_rate": 1.9e-05,
1733
+ "loss": 1.3307,
1734
+ "step": 910
1735
+ },
1736
+ {
1737
+ "epoch": 130.0,
1738
+ "eval_accuracy": 0.6538461538461539,
1739
+ "eval_loss": 2.2564494609832764,
1740
+ "eval_runtime": 2.3375,
1741
+ "eval_samples_per_second": 55.615,
1742
+ "eval_steps_per_second": 0.856,
1743
+ "step": 910
1744
+ },
1745
+ {
1746
+ "epoch": 131.0,
1747
+ "eval_accuracy": 0.6307692307692307,
1748
+ "eval_loss": 2.2525382041931152,
1749
+ "eval_runtime": 2.2827,
1750
+ "eval_samples_per_second": 56.95,
1751
+ "eval_steps_per_second": 0.876,
1752
+ "step": 917
1753
+ },
1754
+ {
1755
+ "epoch": 131.43,
1756
+ "learning_rate": 1.885714285714286e-05,
1757
+ "loss": 1.3124,
1758
+ "step": 920
1759
+ },
1760
+ {
1761
+ "epoch": 132.0,
1762
+ "eval_accuracy": 0.6461538461538462,
1763
+ "eval_loss": 2.218500852584839,
1764
+ "eval_runtime": 2.2212,
1765
+ "eval_samples_per_second": 58.526,
1766
+ "eval_steps_per_second": 0.9,
1767
+ "step": 924
1768
+ },
1769
+ {
1770
+ "epoch": 132.86,
1771
+ "learning_rate": 1.8714285714285717e-05,
1772
+ "loss": 1.2848,
1773
+ "step": 930
1774
+ },
1775
+ {
1776
+ "epoch": 133.0,
1777
+ "eval_accuracy": 0.6461538461538462,
1778
+ "eval_loss": 2.2194907665252686,
1779
+ "eval_runtime": 2.3384,
1780
+ "eval_samples_per_second": 55.594,
1781
+ "eval_steps_per_second": 0.855,
1782
+ "step": 931
1783
+ },
1784
+ {
1785
+ "epoch": 134.0,
1786
+ "eval_accuracy": 0.6307692307692307,
1787
+ "eval_loss": 2.204814910888672,
1788
+ "eval_runtime": 2.2103,
1789
+ "eval_samples_per_second": 58.814,
1790
+ "eval_steps_per_second": 0.905,
1791
+ "step": 938
1792
+ },
1793
+ {
1794
+ "epoch": 134.29,
1795
+ "learning_rate": 1.8571428571428575e-05,
1796
+ "loss": 1.256,
1797
+ "step": 940
1798
+ },
1799
+ {
1800
+ "epoch": 135.0,
1801
+ "eval_accuracy": 0.6461538461538462,
1802
+ "eval_loss": 2.193894624710083,
1803
+ "eval_runtime": 2.2027,
1804
+ "eval_samples_per_second": 59.018,
1805
+ "eval_steps_per_second": 0.908,
1806
+ "step": 945
1807
+ },
1808
+ {
1809
+ "epoch": 135.71,
1810
+ "learning_rate": 1.842857142857143e-05,
1811
+ "loss": 1.24,
1812
+ "step": 950
1813
+ },
1814
+ {
1815
+ "epoch": 136.0,
1816
+ "eval_accuracy": 0.6461538461538462,
1817
+ "eval_loss": 2.1721882820129395,
1818
+ "eval_runtime": 2.3539,
1819
+ "eval_samples_per_second": 55.226,
1820
+ "eval_steps_per_second": 0.85,
1821
+ "step": 952
1822
+ },
1823
+ {
1824
+ "epoch": 137.0,
1825
+ "eval_accuracy": 0.6846153846153846,
1826
+ "eval_loss": 2.1497113704681396,
1827
+ "eval_runtime": 2.1949,
1828
+ "eval_samples_per_second": 59.23,
1829
+ "eval_steps_per_second": 0.911,
1830
+ "step": 959
1831
+ },
1832
+ {
1833
+ "epoch": 137.14,
1834
+ "learning_rate": 1.8285714285714288e-05,
1835
+ "loss": 1.2318,
1836
+ "step": 960
1837
+ },
1838
+ {
1839
+ "epoch": 138.0,
1840
+ "eval_accuracy": 0.6846153846153846,
1841
+ "eval_loss": 2.1332032680511475,
1842
+ "eval_runtime": 2.2346,
1843
+ "eval_samples_per_second": 58.176,
1844
+ "eval_steps_per_second": 0.895,
1845
+ "step": 966
1846
+ },
1847
+ {
1848
+ "epoch": 138.57,
1849
+ "learning_rate": 1.8142857142857146e-05,
1850
+ "loss": 1.1905,
1851
+ "step": 970
1852
+ },
1853
+ {
1854
+ "epoch": 139.0,
1855
+ "eval_accuracy": 0.6923076923076923,
1856
+ "eval_loss": 2.133913278579712,
1857
+ "eval_runtime": 2.1776,
1858
+ "eval_samples_per_second": 59.698,
1859
+ "eval_steps_per_second": 0.918,
1860
+ "step": 973
1861
+ },
1862
+ {
1863
+ "epoch": 140.0,
1864
+ "learning_rate": 1.8e-05,
1865
+ "loss": 1.186,
1866
+ "step": 980
1867
+ },
1868
+ {
1869
+ "epoch": 140.0,
1870
+ "eval_accuracy": 0.6692307692307692,
1871
+ "eval_loss": 2.1346120834350586,
1872
+ "eval_runtime": 2.3208,
1873
+ "eval_samples_per_second": 56.016,
1874
+ "eval_steps_per_second": 0.862,
1875
+ "step": 980
1876
+ },
1877
+ {
1878
+ "epoch": 141.0,
1879
+ "eval_accuracy": 0.7,
1880
+ "eval_loss": 2.092773914337158,
1881
+ "eval_runtime": 2.3738,
1882
+ "eval_samples_per_second": 54.764,
1883
+ "eval_steps_per_second": 0.843,
1884
+ "step": 987
1885
+ },
1886
+ {
1887
+ "epoch": 141.43,
1888
+ "learning_rate": 1.785714285714286e-05,
1889
+ "loss": 1.1759,
1890
+ "step": 990
1891
+ },
1892
+ {
1893
+ "epoch": 142.0,
1894
+ "eval_accuracy": 0.6846153846153846,
1895
+ "eval_loss": 2.1067099571228027,
1896
+ "eval_runtime": 2.356,
1897
+ "eval_samples_per_second": 55.178,
1898
+ "eval_steps_per_second": 0.849,
1899
+ "step": 994
1900
+ },
1901
+ {
1902
+ "epoch": 142.86,
1903
+ "learning_rate": 1.7714285714285717e-05,
1904
+ "loss": 1.132,
1905
+ "step": 1000
1906
+ },
1907
+ {
1908
+ "epoch": 143.0,
1909
+ "eval_accuracy": 0.6692307692307692,
1910
+ "eval_loss": 2.091548442840576,
1911
+ "eval_runtime": 2.3185,
1912
+ "eval_samples_per_second": 56.07,
1913
+ "eval_steps_per_second": 0.863,
1914
+ "step": 1001
1915
+ },
1916
+ {
1917
+ "epoch": 144.0,
1918
+ "eval_accuracy": 0.676923076923077,
1919
+ "eval_loss": 2.0903213024139404,
1920
+ "eval_runtime": 2.1804,
1921
+ "eval_samples_per_second": 59.622,
1922
+ "eval_steps_per_second": 0.917,
1923
+ "step": 1008
1924
+ },
1925
+ {
1926
+ "epoch": 144.29,
1927
+ "learning_rate": 1.757142857142857e-05,
1928
+ "loss": 1.1206,
1929
+ "step": 1010
1930
+ },
1931
+ {
1932
+ "epoch": 145.0,
1933
+ "eval_accuracy": 0.676923076923077,
1934
+ "eval_loss": 2.1074228286743164,
1935
+ "eval_runtime": 2.3685,
1936
+ "eval_samples_per_second": 54.887,
1937
+ "eval_steps_per_second": 0.844,
1938
+ "step": 1015
1939
+ },
1940
+ {
1941
+ "epoch": 145.71,
1942
+ "learning_rate": 1.742857142857143e-05,
1943
+ "loss": 1.1096,
1944
+ "step": 1020
1945
+ },
1946
+ {
1947
+ "epoch": 146.0,
1948
+ "eval_accuracy": 0.7230769230769231,
1949
+ "eval_loss": 2.0536632537841797,
1950
+ "eval_runtime": 2.5866,
1951
+ "eval_samples_per_second": 50.259,
1952
+ "eval_steps_per_second": 0.773,
1953
+ "step": 1022
1954
+ },
1955
+ {
1956
+ "epoch": 147.0,
1957
+ "eval_accuracy": 0.7,
1958
+ "eval_loss": 2.061009407043457,
1959
+ "eval_runtime": 2.3206,
1960
+ "eval_samples_per_second": 56.021,
1961
+ "eval_steps_per_second": 0.862,
1962
+ "step": 1029
1963
+ },
1964
+ {
1965
+ "epoch": 147.14,
1966
+ "learning_rate": 1.7285714285714287e-05,
1967
+ "loss": 1.0814,
1968
+ "step": 1030
1969
+ },
1970
+ {
1971
+ "epoch": 148.0,
1972
+ "eval_accuracy": 0.7076923076923077,
1973
+ "eval_loss": 2.0476534366607666,
1974
+ "eval_runtime": 2.1912,
1975
+ "eval_samples_per_second": 59.33,
1976
+ "eval_steps_per_second": 0.913,
1977
+ "step": 1036
1978
+ },
1979
+ {
1980
+ "epoch": 148.57,
1981
+ "learning_rate": 1.7142857142857142e-05,
1982
+ "loss": 1.0756,
1983
+ "step": 1040
1984
+ },
1985
+ {
1986
+ "epoch": 149.0,
1987
+ "eval_accuracy": 0.7076923076923077,
1988
+ "eval_loss": 2.0184433460235596,
1989
+ "eval_runtime": 2.2139,
1990
+ "eval_samples_per_second": 58.72,
1991
+ "eval_steps_per_second": 0.903,
1992
+ "step": 1043
1993
+ },
1994
+ {
1995
+ "epoch": 150.0,
1996
+ "learning_rate": 1.7e-05,
1997
+ "loss": 1.0638,
1998
+ "step": 1050
1999
+ },
2000
+ {
2001
+ "epoch": 150.0,
2002
+ "eval_accuracy": 0.6923076923076923,
2003
+ "eval_loss": 2.0189385414123535,
2004
+ "eval_runtime": 3.1458,
2005
+ "eval_samples_per_second": 41.325,
2006
+ "eval_steps_per_second": 0.636,
2007
+ "step": 1050
2008
+ },
2009
+ {
2010
+ "epoch": 151.0,
2011
+ "eval_accuracy": 0.7,
2012
+ "eval_loss": 2.01568865776062,
2013
+ "eval_runtime": 2.3668,
2014
+ "eval_samples_per_second": 54.926,
2015
+ "eval_steps_per_second": 0.845,
2016
+ "step": 1057
2017
+ },
2018
+ {
2019
+ "epoch": 151.43,
2020
+ "learning_rate": 1.6857142857142858e-05,
2021
+ "loss": 1.0343,
2022
+ "step": 1060
2023
+ },
2024
+ {
2025
+ "epoch": 152.0,
2026
+ "eval_accuracy": 0.6923076923076923,
2027
+ "eval_loss": 1.9978599548339844,
2028
+ "eval_runtime": 2.3546,
2029
+ "eval_samples_per_second": 55.211,
2030
+ "eval_steps_per_second": 0.849,
2031
+ "step": 1064
2032
+ },
2033
+ {
2034
+ "epoch": 152.86,
2035
+ "learning_rate": 1.6714285714285716e-05,
2036
+ "loss": 1.0187,
2037
+ "step": 1070
2038
+ },
2039
+ {
2040
+ "epoch": 153.0,
2041
+ "eval_accuracy": 0.7230769230769231,
2042
+ "eval_loss": 1.9824119806289673,
2043
+ "eval_runtime": 2.2036,
2044
+ "eval_samples_per_second": 58.995,
2045
+ "eval_steps_per_second": 0.908,
2046
+ "step": 1071
2047
+ },
2048
+ {
2049
+ "epoch": 154.0,
2050
+ "eval_accuracy": 0.7076923076923077,
2051
+ "eval_loss": 1.968167781829834,
2052
+ "eval_runtime": 2.422,
2053
+ "eval_samples_per_second": 53.675,
2054
+ "eval_steps_per_second": 0.826,
2055
+ "step": 1078
2056
+ },
2057
+ {
2058
+ "epoch": 154.29,
2059
+ "learning_rate": 1.6571428571428574e-05,
2060
+ "loss": 0.9993,
2061
+ "step": 1080
2062
+ },
2063
+ {
2064
+ "epoch": 155.0,
2065
+ "eval_accuracy": 0.7230769230769231,
2066
+ "eval_loss": 1.9394822120666504,
2067
+ "eval_runtime": 2.3562,
2068
+ "eval_samples_per_second": 55.175,
2069
+ "eval_steps_per_second": 0.849,
2070
+ "step": 1085
2071
+ },
2072
+ {
2073
+ "epoch": 155.71,
2074
+ "learning_rate": 1.642857142857143e-05,
2075
+ "loss": 1.0029,
2076
+ "step": 1090
2077
+ },
2078
+ {
2079
+ "epoch": 156.0,
2080
+ "eval_accuracy": 0.7153846153846154,
2081
+ "eval_loss": 1.920530080795288,
2082
+ "eval_runtime": 2.3989,
2083
+ "eval_samples_per_second": 54.191,
2084
+ "eval_steps_per_second": 0.834,
2085
+ "step": 1092
2086
+ },
2087
+ {
2088
+ "epoch": 157.0,
2089
+ "eval_accuracy": 0.7076923076923077,
2090
+ "eval_loss": 1.906773567199707,
2091
+ "eval_runtime": 2.3766,
2092
+ "eval_samples_per_second": 54.699,
2093
+ "eval_steps_per_second": 0.842,
2094
+ "step": 1099
2095
+ },
2096
+ {
2097
+ "epoch": 157.14,
2098
+ "learning_rate": 1.6285714285714287e-05,
2099
+ "loss": 0.9703,
2100
+ "step": 1100
2101
+ },
2102
+ {
2103
+ "epoch": 158.0,
2104
+ "eval_accuracy": 0.7384615384615385,
2105
+ "eval_loss": 1.899588704109192,
2106
+ "eval_runtime": 2.3516,
2107
+ "eval_samples_per_second": 55.282,
2108
+ "eval_steps_per_second": 0.85,
2109
+ "step": 1106
2110
+ },
2111
+ {
2112
+ "epoch": 158.57,
2113
+ "learning_rate": 1.6142857142857145e-05,
2114
+ "loss": 0.9532,
2115
+ "step": 1110
2116
+ },
2117
+ {
2118
+ "epoch": 159.0,
2119
+ "eval_accuracy": 0.7076923076923077,
2120
+ "eval_loss": 1.8944636583328247,
2121
+ "eval_runtime": 2.2108,
2122
+ "eval_samples_per_second": 58.803,
2123
+ "eval_steps_per_second": 0.905,
2124
+ "step": 1113
2125
+ },
2126
+ {
2127
+ "epoch": 160.0,
2128
+ "learning_rate": 1.6000000000000003e-05,
2129
+ "loss": 0.9526,
2130
+ "step": 1120
2131
+ },
2132
+ {
2133
+ "epoch": 160.0,
2134
+ "eval_accuracy": 0.7307692307692307,
2135
+ "eval_loss": 1.8939263820648193,
2136
+ "eval_runtime": 2.2202,
2137
+ "eval_samples_per_second": 58.553,
2138
+ "eval_steps_per_second": 0.901,
2139
+ "step": 1120
2140
+ },
2141
+ {
2142
+ "epoch": 161.0,
2143
+ "eval_accuracy": 0.7230769230769231,
2144
+ "eval_loss": 1.8937941789627075,
2145
+ "eval_runtime": 2.3259,
2146
+ "eval_samples_per_second": 55.892,
2147
+ "eval_steps_per_second": 0.86,
2148
+ "step": 1127
2149
+ },
2150
+ {
2151
+ "epoch": 161.43,
2152
+ "learning_rate": 1.5857142857142857e-05,
2153
+ "loss": 0.9365,
2154
+ "step": 1130
2155
+ },
2156
+ {
2157
+ "epoch": 162.0,
2158
+ "eval_accuracy": 0.7384615384615385,
2159
+ "eval_loss": 1.8544682264328003,
2160
+ "eval_runtime": 2.1829,
2161
+ "eval_samples_per_second": 59.554,
2162
+ "eval_steps_per_second": 0.916,
2163
+ "step": 1134
2164
+ },
2165
+ {
2166
+ "epoch": 162.86,
2167
+ "learning_rate": 1.5714285714285715e-05,
2168
+ "loss": 0.9301,
2169
+ "step": 1140
2170
+ },
2171
+ {
2172
+ "epoch": 163.0,
2173
+ "eval_accuracy": 0.7384615384615385,
2174
+ "eval_loss": 1.860898733139038,
2175
+ "eval_runtime": 2.2861,
2176
+ "eval_samples_per_second": 56.864,
2177
+ "eval_steps_per_second": 0.875,
2178
+ "step": 1141
2179
+ },
2180
+ {
2181
+ "epoch": 164.0,
2182
+ "eval_accuracy": 0.7230769230769231,
2183
+ "eval_loss": 1.8540517091751099,
2184
+ "eval_runtime": 2.2092,
2185
+ "eval_samples_per_second": 58.844,
2186
+ "eval_steps_per_second": 0.905,
2187
+ "step": 1148
2188
+ },
2189
+ {
2190
+ "epoch": 164.29,
2191
+ "learning_rate": 1.5571428571428573e-05,
2192
+ "loss": 0.8856,
2193
+ "step": 1150
2194
+ },
2195
+ {
2196
+ "epoch": 165.0,
2197
+ "eval_accuracy": 0.7076923076923077,
2198
+ "eval_loss": 1.884601354598999,
2199
+ "eval_runtime": 2.2447,
2200
+ "eval_samples_per_second": 57.915,
2201
+ "eval_steps_per_second": 0.891,
2202
+ "step": 1155
2203
+ },
2204
+ {
2205
+ "epoch": 165.71,
2206
+ "learning_rate": 1.542857142857143e-05,
2207
+ "loss": 0.8801,
2208
+ "step": 1160
2209
+ },
2210
+ {
2211
+ "epoch": 166.0,
2212
+ "eval_accuracy": 0.7076923076923077,
2213
+ "eval_loss": 1.867126226425171,
2214
+ "eval_runtime": 2.2028,
2215
+ "eval_samples_per_second": 59.017,
2216
+ "eval_steps_per_second": 0.908,
2217
+ "step": 1162
2218
+ },
2219
+ {
2220
+ "epoch": 167.0,
2221
+ "eval_accuracy": 0.7230769230769231,
2222
+ "eval_loss": 1.8465133905410767,
2223
+ "eval_runtime": 2.3398,
2224
+ "eval_samples_per_second": 55.559,
2225
+ "eval_steps_per_second": 0.855,
2226
+ "step": 1169
2227
+ },
2228
+ {
2229
+ "epoch": 167.14,
2230
+ "learning_rate": 1.5285714285714286e-05,
2231
+ "loss": 0.8898,
2232
+ "step": 1170
2233
+ },
2234
+ {
2235
+ "epoch": 168.0,
2236
+ "eval_accuracy": 0.7230769230769231,
2237
+ "eval_loss": 1.8428664207458496,
2238
+ "eval_runtime": 2.1983,
2239
+ "eval_samples_per_second": 59.138,
2240
+ "eval_steps_per_second": 0.91,
2241
+ "step": 1176
2242
+ },
2243
+ {
2244
+ "epoch": 168.57,
2245
+ "learning_rate": 1.5142857142857144e-05,
2246
+ "loss": 0.8729,
2247
+ "step": 1180
2248
+ },
2249
+ {
2250
+ "epoch": 169.0,
2251
+ "eval_accuracy": 0.7384615384615385,
2252
+ "eval_loss": 1.832564115524292,
2253
+ "eval_runtime": 2.357,
2254
+ "eval_samples_per_second": 55.155,
2255
+ "eval_steps_per_second": 0.849,
2256
+ "step": 1183
2257
+ },
2258
+ {
2259
+ "epoch": 170.0,
2260
+ "learning_rate": 1.5000000000000002e-05,
2261
+ "loss": 0.8632,
2262
+ "step": 1190
2263
+ },
2264
+ {
2265
+ "epoch": 170.0,
2266
+ "eval_accuracy": 0.7461538461538462,
2267
+ "eval_loss": 1.810370683670044,
2268
+ "eval_runtime": 2.4307,
2269
+ "eval_samples_per_second": 53.483,
2270
+ "eval_steps_per_second": 0.823,
2271
+ "step": 1190
2272
+ },
2273
+ {
2274
+ "epoch": 171.0,
2275
+ "eval_accuracy": 0.7384615384615385,
2276
+ "eval_loss": 1.8017692565917969,
2277
+ "eval_runtime": 2.2052,
2278
+ "eval_samples_per_second": 58.951,
2279
+ "eval_steps_per_second": 0.907,
2280
+ "step": 1197
2281
+ },
2282
+ {
2283
+ "epoch": 171.43,
2284
+ "learning_rate": 1.4857142857142858e-05,
2285
+ "loss": 0.8423,
2286
+ "step": 1200
2287
+ },
2288
+ {
2289
+ "epoch": 172.0,
2290
+ "eval_accuracy": 0.7615384615384615,
2291
+ "eval_loss": 1.7875727415084839,
2292
+ "eval_runtime": 2.3658,
2293
+ "eval_samples_per_second": 54.949,
2294
+ "eval_steps_per_second": 0.845,
2295
+ "step": 1204
2296
+ },
2297
+ {
2298
+ "epoch": 172.86,
2299
+ "learning_rate": 1.4714285714285716e-05,
2300
+ "loss": 0.828,
2301
+ "step": 1210
2302
+ },
2303
+ {
2304
+ "epoch": 173.0,
2305
+ "eval_accuracy": 0.7538461538461538,
2306
+ "eval_loss": 1.787375569343567,
2307
+ "eval_runtime": 2.2488,
2308
+ "eval_samples_per_second": 57.808,
2309
+ "eval_steps_per_second": 0.889,
2310
+ "step": 1211
2311
+ },
2312
+ {
2313
+ "epoch": 174.0,
2314
+ "eval_accuracy": 0.7692307692307693,
2315
+ "eval_loss": 1.7785577774047852,
2316
+ "eval_runtime": 2.3295,
2317
+ "eval_samples_per_second": 55.807,
2318
+ "eval_steps_per_second": 0.859,
2319
+ "step": 1218
2320
+ },
2321
+ {
2322
+ "epoch": 174.29,
2323
+ "learning_rate": 1.4571428571428573e-05,
2324
+ "loss": 0.8063,
2325
+ "step": 1220
2326
+ },
2327
+ {
2328
+ "epoch": 175.0,
2329
+ "eval_accuracy": 0.7461538461538462,
2330
+ "eval_loss": 1.7820396423339844,
2331
+ "eval_runtime": 2.2553,
2332
+ "eval_samples_per_second": 57.642,
2333
+ "eval_steps_per_second": 0.887,
2334
+ "step": 1225
2335
+ },
2336
+ {
2337
+ "epoch": 175.71,
2338
+ "learning_rate": 1.4428571428571429e-05,
2339
+ "loss": 0.8084,
2340
+ "step": 1230
2341
+ },
2342
+ {
2343
+ "epoch": 176.0,
2344
+ "eval_accuracy": 0.7538461538461538,
2345
+ "eval_loss": 1.800405502319336,
2346
+ "eval_runtime": 2.303,
2347
+ "eval_samples_per_second": 56.447,
2348
+ "eval_steps_per_second": 0.868,
2349
+ "step": 1232
2350
+ },
2351
+ {
2352
+ "epoch": 177.0,
2353
+ "eval_accuracy": 0.7615384615384615,
2354
+ "eval_loss": 1.787461757659912,
2355
+ "eval_runtime": 2.2538,
2356
+ "eval_samples_per_second": 57.679,
2357
+ "eval_steps_per_second": 0.887,
2358
+ "step": 1239
2359
+ },
2360
+ {
2361
+ "epoch": 177.14,
2362
+ "learning_rate": 1.4285714285714287e-05,
2363
+ "loss": 0.7948,
2364
+ "step": 1240
2365
+ },
2366
+ {
2367
+ "epoch": 178.0,
2368
+ "eval_accuracy": 0.7615384615384615,
2369
+ "eval_loss": 1.7639268636703491,
2370
+ "eval_runtime": 2.3516,
2371
+ "eval_samples_per_second": 55.281,
2372
+ "eval_steps_per_second": 0.85,
2373
+ "step": 1246
2374
+ },
2375
+ {
2376
+ "epoch": 178.57,
2377
+ "learning_rate": 1.4142857142857145e-05,
2378
+ "loss": 0.7687,
2379
+ "step": 1250
2380
+ },
2381
+ {
2382
+ "epoch": 179.0,
2383
+ "eval_accuracy": 0.7538461538461538,
2384
+ "eval_loss": 1.7797616720199585,
2385
+ "eval_runtime": 2.2016,
2386
+ "eval_samples_per_second": 59.047,
2387
+ "eval_steps_per_second": 0.908,
2388
+ "step": 1253
2389
+ },
2390
+ {
2391
+ "epoch": 180.0,
2392
+ "learning_rate": 1.4e-05,
2393
+ "loss": 0.7598,
2394
+ "step": 1260
2395
+ },
2396
+ {
2397
+ "epoch": 180.0,
2398
+ "eval_accuracy": 0.7538461538461538,
2399
+ "eval_loss": 1.7775954008102417,
2400
+ "eval_runtime": 2.2652,
2401
+ "eval_samples_per_second": 57.389,
2402
+ "eval_steps_per_second": 0.883,
2403
+ "step": 1260
2404
+ },
2405
+ {
2406
+ "epoch": 181.0,
2407
+ "eval_accuracy": 0.7384615384615385,
2408
+ "eval_loss": 1.7582831382751465,
2409
+ "eval_runtime": 2.3301,
2410
+ "eval_samples_per_second": 55.792,
2411
+ "eval_steps_per_second": 0.858,
2412
+ "step": 1267
2413
+ },
2414
+ {
2415
+ "epoch": 181.43,
2416
+ "learning_rate": 1.3857142857142858e-05,
2417
+ "loss": 0.7622,
2418
+ "step": 1270
2419
+ },
2420
+ {
2421
+ "epoch": 182.0,
2422
+ "eval_accuracy": 0.7538461538461538,
2423
+ "eval_loss": 1.7546014785766602,
2424
+ "eval_runtime": 2.3427,
2425
+ "eval_samples_per_second": 55.493,
2426
+ "eval_steps_per_second": 0.854,
2427
+ "step": 1274
2428
+ },
2429
+ {
2430
+ "epoch": 182.86,
2431
+ "learning_rate": 1.3714285714285716e-05,
2432
+ "loss": 0.754,
2433
+ "step": 1280
2434
+ },
2435
+ {
2436
+ "epoch": 183.0,
2437
+ "eval_accuracy": 0.7615384615384615,
2438
+ "eval_loss": 1.7243136167526245,
2439
+ "eval_runtime": 2.2386,
2440
+ "eval_samples_per_second": 58.071,
2441
+ "eval_steps_per_second": 0.893,
2442
+ "step": 1281
2443
+ },
2444
+ {
2445
+ "epoch": 184.0,
2446
+ "eval_accuracy": 0.7538461538461538,
2447
+ "eval_loss": 1.7218064069747925,
2448
+ "eval_runtime": 2.3487,
2449
+ "eval_samples_per_second": 55.35,
2450
+ "eval_steps_per_second": 0.852,
2451
+ "step": 1288
2452
+ },
2453
+ {
2454
+ "epoch": 184.29,
2455
+ "learning_rate": 1.3571428571428574e-05,
2456
+ "loss": 0.7397,
2457
+ "step": 1290
2458
+ },
2459
+ {
2460
+ "epoch": 185.0,
2461
+ "eval_accuracy": 0.7538461538461538,
2462
+ "eval_loss": 1.6983821392059326,
2463
+ "eval_runtime": 2.2077,
2464
+ "eval_samples_per_second": 58.886,
2465
+ "eval_steps_per_second": 0.906,
2466
+ "step": 1295
2467
+ },
2468
+ {
2469
+ "epoch": 185.71,
2470
+ "learning_rate": 1.3428571428571429e-05,
2471
+ "loss": 0.7171,
2472
+ "step": 1300
2473
+ },
2474
+ {
2475
+ "epoch": 186.0,
2476
+ "eval_accuracy": 0.7461538461538462,
2477
+ "eval_loss": 1.6678060293197632,
2478
+ "eval_runtime": 2.3645,
2479
+ "eval_samples_per_second": 54.98,
2480
+ "eval_steps_per_second": 0.846,
2481
+ "step": 1302
2482
+ },
2483
+ {
2484
+ "epoch": 187.0,
2485
+ "eval_accuracy": 0.7692307692307693,
2486
+ "eval_loss": 1.6591731309890747,
2487
+ "eval_runtime": 2.3411,
2488
+ "eval_samples_per_second": 55.529,
2489
+ "eval_steps_per_second": 0.854,
2490
+ "step": 1309
2491
+ },
2492
+ {
2493
+ "epoch": 187.14,
2494
+ "learning_rate": 1.3285714285714287e-05,
2495
+ "loss": 0.7351,
2496
+ "step": 1310
2497
+ },
2498
+ {
2499
+ "epoch": 188.0,
2500
+ "eval_accuracy": 0.7538461538461538,
2501
+ "eval_loss": 1.67545747756958,
2502
+ "eval_runtime": 2.2255,
2503
+ "eval_samples_per_second": 58.414,
2504
+ "eval_steps_per_second": 0.899,
2505
+ "step": 1316
2506
+ },
2507
+ {
2508
+ "epoch": 188.57,
2509
+ "learning_rate": 1.3142857142857145e-05,
2510
+ "loss": 0.717,
2511
+ "step": 1320
2512
+ },
2513
+ {
2514
+ "epoch": 189.0,
2515
+ "eval_accuracy": 0.7846153846153846,
2516
+ "eval_loss": 1.6684316396713257,
2517
+ "eval_runtime": 2.2461,
2518
+ "eval_samples_per_second": 57.879,
2519
+ "eval_steps_per_second": 0.89,
2520
+ "step": 1323
2521
+ },
2522
+ {
2523
+ "epoch": 190.0,
2524
+ "learning_rate": 1.3000000000000001e-05,
2525
+ "loss": 0.703,
2526
+ "step": 1330
2527
+ },
2528
+ {
2529
+ "epoch": 190.0,
2530
+ "eval_accuracy": 0.7692307692307693,
2531
+ "eval_loss": 1.6624796390533447,
2532
+ "eval_runtime": 2.3197,
2533
+ "eval_samples_per_second": 56.043,
2534
+ "eval_steps_per_second": 0.862,
2535
+ "step": 1330
2536
+ },
2537
+ {
2538
+ "epoch": 191.0,
2539
+ "eval_accuracy": 0.7769230769230769,
2540
+ "eval_loss": 1.6624727249145508,
2541
+ "eval_runtime": 2.2578,
2542
+ "eval_samples_per_second": 57.577,
2543
+ "eval_steps_per_second": 0.886,
2544
+ "step": 1337
2545
+ },
2546
+ {
2547
+ "epoch": 191.43,
2548
+ "learning_rate": 1.2857142857142859e-05,
2549
+ "loss": 0.7036,
2550
+ "step": 1340
2551
+ },
2552
+ {
2553
+ "epoch": 192.0,
2554
+ "eval_accuracy": 0.7615384615384615,
2555
+ "eval_loss": 1.6422407627105713,
2556
+ "eval_runtime": 2.4009,
2557
+ "eval_samples_per_second": 54.146,
2558
+ "eval_steps_per_second": 0.833,
2559
+ "step": 1344
2560
+ },
2561
+ {
2562
+ "epoch": 192.86,
2563
+ "learning_rate": 1.2714285714285715e-05,
2564
+ "loss": 0.698,
2565
+ "step": 1350
2566
+ },
2567
+ {
2568
+ "epoch": 193.0,
2569
+ "eval_accuracy": 0.7538461538461538,
2570
+ "eval_loss": 1.6632704734802246,
2571
+ "eval_runtime": 2.2565,
2572
+ "eval_samples_per_second": 57.612,
2573
+ "eval_steps_per_second": 0.886,
2574
+ "step": 1351
2575
+ },
2576
+ {
2577
+ "epoch": 194.0,
2578
+ "eval_accuracy": 0.7615384615384615,
2579
+ "eval_loss": 1.663210153579712,
2580
+ "eval_runtime": 2.3463,
2581
+ "eval_samples_per_second": 55.406,
2582
+ "eval_steps_per_second": 0.852,
2583
+ "step": 1358
2584
+ },
2585
+ {
2586
+ "epoch": 194.29,
2587
+ "learning_rate": 1.2571428571428572e-05,
2588
+ "loss": 0.7002,
2589
+ "step": 1360
2590
+ },
2591
+ {
2592
+ "epoch": 195.0,
2593
+ "eval_accuracy": 0.7538461538461538,
2594
+ "eval_loss": 1.6452277898788452,
2595
+ "eval_runtime": 2.2223,
2596
+ "eval_samples_per_second": 58.497,
2597
+ "eval_steps_per_second": 0.9,
2598
+ "step": 1365
2599
+ },
2600
+ {
2601
+ "epoch": 195.71,
2602
+ "learning_rate": 1.242857142857143e-05,
2603
+ "loss": 0.6662,
2604
+ "step": 1370
2605
+ },
2606
+ {
2607
+ "epoch": 196.0,
2608
+ "eval_accuracy": 0.7615384615384615,
2609
+ "eval_loss": 1.6403251886367798,
2610
+ "eval_runtime": 2.3447,
2611
+ "eval_samples_per_second": 55.444,
2612
+ "eval_steps_per_second": 0.853,
2613
+ "step": 1372
2614
+ },
2615
+ {
2616
+ "epoch": 197.0,
2617
+ "eval_accuracy": 0.7538461538461538,
2618
+ "eval_loss": 1.627591848373413,
2619
+ "eval_runtime": 2.3495,
2620
+ "eval_samples_per_second": 55.332,
2621
+ "eval_steps_per_second": 0.851,
2622
+ "step": 1379
2623
+ },
2624
+ {
2625
+ "epoch": 197.14,
2626
+ "learning_rate": 1.2285714285714288e-05,
2627
+ "loss": 0.6771,
2628
+ "step": 1380
2629
+ },
2630
+ {
2631
+ "epoch": 198.0,
2632
+ "eval_accuracy": 0.7769230769230769,
2633
+ "eval_loss": 1.6097954511642456,
2634
+ "eval_runtime": 2.2213,
2635
+ "eval_samples_per_second": 58.524,
2636
+ "eval_steps_per_second": 0.9,
2637
+ "step": 1386
2638
+ },
2639
+ {
2640
+ "epoch": 198.57,
2641
+ "learning_rate": 1.2142857142857142e-05,
2642
+ "loss": 0.6588,
2643
+ "step": 1390
2644
+ },
2645
+ {
2646
+ "epoch": 199.0,
2647
+ "eval_accuracy": 0.7692307692307693,
2648
+ "eval_loss": 1.622355580329895,
2649
+ "eval_runtime": 2.2376,
2650
+ "eval_samples_per_second": 58.098,
2651
+ "eval_steps_per_second": 0.894,
2652
+ "step": 1393
2653
+ },
2654
+ {
2655
+ "epoch": 200.0,
2656
+ "learning_rate": 1.2e-05,
2657
+ "loss": 0.6549,
2658
+ "step": 1400
2659
+ },
2660
+ {
2661
+ "epoch": 200.0,
2662
+ "eval_accuracy": 0.7461538461538462,
2663
+ "eval_loss": 1.6263408660888672,
2664
+ "eval_runtime": 2.3312,
2665
+ "eval_samples_per_second": 55.765,
2666
+ "eval_steps_per_second": 0.858,
2667
+ "step": 1400
2668
+ },
2669
+ {
2670
+ "epoch": 200.0,
2671
+ "step": 1400,
2672
+ "total_flos": 8.062778508115968e+18,
2673
+ "train_loss": 0.39147548845836094,
2674
+ "train_runtime": 3403.5317,
2675
+ "train_samples_per_second": 30.556,
2676
+ "train_steps_per_second": 0.411
2677
  }
2678
  ],
2679
  "logging_steps": 10,
2680
+ "max_steps": 1400,
2681
  "num_input_tokens_seen": 0,
2682
+ "num_train_epochs": 200,
2683
  "save_steps": 500,
2684
+ "total_flos": 8.062778508115968e+18,
2685
  "train_batch_size": 80,
2686
  "trial_name": null,
2687
  "trial_params": null