qubvel-hf HF staff commited on
Commit
299c056
1 Parent(s): d603799

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: timm/resnet18.a1_in1k
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # vit-base-beans
18
 
19
- This model is a fine-tuned version of [timm/resnet18.a1_in1k](https://huggingface.co/timm/resnet18.a1_in1k) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.6875
22
  - Accuracy: 0.8647
 
3
  license: apache-2.0
4
  base_model: timm/resnet18.a1_in1k
5
  tags:
6
+ - image-classification
7
+ - vision
8
  - generated_from_trainer
9
  metrics:
10
  - accuracy
 
18
 
19
  # vit-base-beans
20
 
21
+ This model is a fine-tuned version of [timm/resnet18.a1_in1k](https://huggingface.co/timm/resnet18.a1_in1k) on the beans dataset.
22
  It achieves the following results on the evaluation set:
23
  - Loss: 0.6875
24
  - Accuracy: 0.8647
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.8120300751879699,
4
- "eval_loss": 0.7412300109863281,
5
- "eval_runtime": 0.7631,
6
- "eval_samples_per_second": 174.289,
7
- "eval_steps_per_second": 22.278,
8
- "total_flos": 1.5658365504595968e+17,
9
- "train_loss": 0.9236146088135548,
10
- "train_runtime": 142.953,
11
- "train_samples_per_second": 108.497,
12
- "train_steps_per_second": 13.641
13
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.8646616541353384,
4
+ "eval_loss": 0.6874601244926453,
5
+ "eval_runtime": 0.7676,
6
+ "eval_samples_per_second": 173.27,
7
+ "eval_steps_per_second": 22.147,
8
+ "total_flos": 2.0877820672794624e+17,
9
+ "train_loss": 0.19350949709232038,
10
+ "train_runtime": 49.806,
11
+ "train_samples_per_second": 415.211,
12
+ "train_steps_per_second": 52.203
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.8120300751879699,
4
- "eval_loss": 0.7412300109863281,
5
- "eval_runtime": 0.7631,
6
- "eval_samples_per_second": 174.289,
7
- "eval_steps_per_second": 22.278
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.8646616541353384,
4
+ "eval_loss": 0.6874601244926453,
5
+ "eval_runtime": 0.7676,
6
+ "eval_samples_per_second": 173.27,
7
+ "eval_steps_per_second": 22.147
8
  }
runs/Nov19_22-52-51_ip-10-90-1-182/events.out.tfevents.1732056828.ip-10-90-1-182.3356820.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc3dc72508c8ab2a05a0211d3b7e188a486645e03e73915f4ba266fcee25a663
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 1.5658365504595968e+17,
4
- "train_loss": 0.9236146088135548,
5
- "train_runtime": 142.953,
6
- "train_samples_per_second": 108.497,
7
- "train_steps_per_second": 13.641
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 2.0877820672794624e+17,
4
+ "train_loss": 0.19350949709232038,
5
+ "train_runtime": 49.806,
6
+ "train_samples_per_second": 415.211,
7
+ "train_steps_per_second": 52.203
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7412300109863281,
3
- "best_model_checkpoint": "./beans_outputs/checkpoint-1950",
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1509,19 +1509,519 @@
1509
  "step": 1950
1510
  },
1511
  {
1512
- "epoch": 15.0,
1513
- "step": 1950,
1514
- "total_flos": 1.5658365504595968e+17,
1515
- "train_loss": 0.9236146088135548,
1516
- "train_runtime": 142.953,
1517
- "train_samples_per_second": 108.497,
1518
- "train_steps_per_second": 13.641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1519
  }
1520
  ],
1521
  "logging_steps": 10,
1522
- "max_steps": 1950,
1523
  "num_input_tokens_seen": 0,
1524
- "num_train_epochs": 15,
1525
  "save_steps": 500,
1526
  "stateful_callbacks": {
1527
  "TrainerControl": {
@@ -1535,7 +2035,7 @@
1535
  "attributes": {}
1536
  }
1537
  },
1538
- "total_flos": 1.5658365504595968e+17,
1539
  "train_batch_size": 8,
1540
  "trial_name": null,
1541
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6874601244926453,
3
+ "best_model_checkpoint": "./beans_outputs/checkpoint-2600",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1509
  "step": 1950
1510
  },
1511
  {
1512
+ "epoch": 15.076923076923077,
1513
+ "grad_norm": 3.3559834957122803,
1514
+ "learning_rate": 4.923076923076924e-06,
1515
+ "loss": 0.7535,
1516
+ "step": 1960
1517
+ },
1518
+ {
1519
+ "epoch": 15.153846153846153,
1520
+ "grad_norm": 1.8613739013671875,
1521
+ "learning_rate": 4.8461538461538465e-06,
1522
+ "loss": 0.7581,
1523
+ "step": 1970
1524
+ },
1525
+ {
1526
+ "epoch": 15.23076923076923,
1527
+ "grad_norm": 2.3707966804504395,
1528
+ "learning_rate": 4.76923076923077e-06,
1529
+ "loss": 0.7836,
1530
+ "step": 1980
1531
+ },
1532
+ {
1533
+ "epoch": 15.307692307692308,
1534
+ "grad_norm": 2.6265199184417725,
1535
+ "learning_rate": 4.692307692307693e-06,
1536
+ "loss": 0.8334,
1537
+ "step": 1990
1538
+ },
1539
+ {
1540
+ "epoch": 15.384615384615385,
1541
+ "grad_norm": 2.078848123550415,
1542
+ "learning_rate": 4.615384615384616e-06,
1543
+ "loss": 0.7772,
1544
+ "step": 2000
1545
+ },
1546
+ {
1547
+ "epoch": 15.461538461538462,
1548
+ "grad_norm": 2.6433162689208984,
1549
+ "learning_rate": 4.538461538461539e-06,
1550
+ "loss": 0.7955,
1551
+ "step": 2010
1552
+ },
1553
+ {
1554
+ "epoch": 15.538461538461538,
1555
+ "grad_norm": 3.458962917327881,
1556
+ "learning_rate": 4.461538461538462e-06,
1557
+ "loss": 0.787,
1558
+ "step": 2020
1559
+ },
1560
+ {
1561
+ "epoch": 15.615384615384615,
1562
+ "grad_norm": 5.090147495269775,
1563
+ "learning_rate": 4.384615384615385e-06,
1564
+ "loss": 0.7875,
1565
+ "step": 2030
1566
+ },
1567
+ {
1568
+ "epoch": 15.692307692307692,
1569
+ "grad_norm": 1.9066407680511475,
1570
+ "learning_rate": 4.307692307692308e-06,
1571
+ "loss": 0.7764,
1572
+ "step": 2040
1573
+ },
1574
+ {
1575
+ "epoch": 15.76923076923077,
1576
+ "grad_norm": 3.097341299057007,
1577
+ "learning_rate": 4.230769230769231e-06,
1578
+ "loss": 0.7335,
1579
+ "step": 2050
1580
+ },
1581
+ {
1582
+ "epoch": 15.846153846153847,
1583
+ "grad_norm": 2.7201600074768066,
1584
+ "learning_rate": 4.1538461538461545e-06,
1585
+ "loss": 0.7747,
1586
+ "step": 2060
1587
+ },
1588
+ {
1589
+ "epoch": 15.923076923076923,
1590
+ "grad_norm": 2.303032398223877,
1591
+ "learning_rate": 4.076923076923077e-06,
1592
+ "loss": 0.7738,
1593
+ "step": 2070
1594
+ },
1595
+ {
1596
+ "epoch": 16.0,
1597
+ "grad_norm": 4.420492172241211,
1598
+ "learning_rate": 4.000000000000001e-06,
1599
+ "loss": 0.774,
1600
+ "step": 2080
1601
+ },
1602
+ {
1603
+ "epoch": 16.0,
1604
+ "eval_accuracy": 0.849624060150376,
1605
+ "eval_loss": 0.7370420694351196,
1606
+ "eval_runtime": 0.776,
1607
+ "eval_samples_per_second": 171.384,
1608
+ "eval_steps_per_second": 21.906,
1609
+ "step": 2080
1610
+ },
1611
+ {
1612
+ "epoch": 16.076923076923077,
1613
+ "grad_norm": 3.9969003200531006,
1614
+ "learning_rate": 3.923076923076923e-06,
1615
+ "loss": 0.8316,
1616
+ "step": 2090
1617
+ },
1618
+ {
1619
+ "epoch": 16.153846153846153,
1620
+ "grad_norm": 2.3731822967529297,
1621
+ "learning_rate": 3.846153846153847e-06,
1622
+ "loss": 0.8162,
1623
+ "step": 2100
1624
+ },
1625
+ {
1626
+ "epoch": 16.23076923076923,
1627
+ "grad_norm": 2.232074737548828,
1628
+ "learning_rate": 3.7692307692307694e-06,
1629
+ "loss": 0.8138,
1630
+ "step": 2110
1631
+ },
1632
+ {
1633
+ "epoch": 16.307692307692307,
1634
+ "grad_norm": 2.8799118995666504,
1635
+ "learning_rate": 3.692307692307693e-06,
1636
+ "loss": 0.8434,
1637
+ "step": 2120
1638
+ },
1639
+ {
1640
+ "epoch": 16.384615384615383,
1641
+ "grad_norm": 2.2093818187713623,
1642
+ "learning_rate": 3.6153846153846156e-06,
1643
+ "loss": 0.7886,
1644
+ "step": 2130
1645
+ },
1646
+ {
1647
+ "epoch": 16.46153846153846,
1648
+ "grad_norm": 1.984840750694275,
1649
+ "learning_rate": 3.538461538461539e-06,
1650
+ "loss": 0.7682,
1651
+ "step": 2140
1652
+ },
1653
+ {
1654
+ "epoch": 16.53846153846154,
1655
+ "grad_norm": 2.711601495742798,
1656
+ "learning_rate": 3.4615384615384617e-06,
1657
+ "loss": 0.7471,
1658
+ "step": 2150
1659
+ },
1660
+ {
1661
+ "epoch": 16.615384615384617,
1662
+ "grad_norm": 2.130311965942383,
1663
+ "learning_rate": 3.384615384615385e-06,
1664
+ "loss": 0.7535,
1665
+ "step": 2160
1666
+ },
1667
+ {
1668
+ "epoch": 16.692307692307693,
1669
+ "grad_norm": 2.327207565307617,
1670
+ "learning_rate": 3.307692307692308e-06,
1671
+ "loss": 0.718,
1672
+ "step": 2170
1673
+ },
1674
+ {
1675
+ "epoch": 16.76923076923077,
1676
+ "grad_norm": 2.198944091796875,
1677
+ "learning_rate": 3.2307692307692313e-06,
1678
+ "loss": 0.8146,
1679
+ "step": 2180
1680
+ },
1681
+ {
1682
+ "epoch": 16.846153846153847,
1683
+ "grad_norm": 2.388453483581543,
1684
+ "learning_rate": 3.153846153846154e-06,
1685
+ "loss": 0.8368,
1686
+ "step": 2190
1687
+ },
1688
+ {
1689
+ "epoch": 16.923076923076923,
1690
+ "grad_norm": 2.2575690746307373,
1691
+ "learning_rate": 3.0769230769230774e-06,
1692
+ "loss": 0.749,
1693
+ "step": 2200
1694
+ },
1695
+ {
1696
+ "epoch": 17.0,
1697
+ "grad_norm": 6.020498275756836,
1698
+ "learning_rate": 3e-06,
1699
+ "loss": 0.7613,
1700
+ "step": 2210
1701
+ },
1702
+ {
1703
+ "epoch": 17.0,
1704
+ "eval_accuracy": 0.849624060150376,
1705
+ "eval_loss": 0.7059224247932434,
1706
+ "eval_runtime": 0.7496,
1707
+ "eval_samples_per_second": 177.439,
1708
+ "eval_steps_per_second": 22.68,
1709
+ "step": 2210
1710
+ },
1711
+ {
1712
+ "epoch": 17.076923076923077,
1713
+ "grad_norm": 3.134481430053711,
1714
+ "learning_rate": 2.9230769230769236e-06,
1715
+ "loss": 0.7609,
1716
+ "step": 2220
1717
+ },
1718
+ {
1719
+ "epoch": 17.153846153846153,
1720
+ "grad_norm": 2.0070559978485107,
1721
+ "learning_rate": 2.846153846153846e-06,
1722
+ "loss": 0.7483,
1723
+ "step": 2230
1724
+ },
1725
+ {
1726
+ "epoch": 17.23076923076923,
1727
+ "grad_norm": 3.491682291030884,
1728
+ "learning_rate": 2.7692307692307697e-06,
1729
+ "loss": 0.7696,
1730
+ "step": 2240
1731
+ },
1732
+ {
1733
+ "epoch": 17.307692307692307,
1734
+ "grad_norm": 1.9866397380828857,
1735
+ "learning_rate": 2.6923076923076923e-06,
1736
+ "loss": 0.7609,
1737
+ "step": 2250
1738
+ },
1739
+ {
1740
+ "epoch": 17.384615384615383,
1741
+ "grad_norm": 3.458582878112793,
1742
+ "learning_rate": 2.615384615384616e-06,
1743
+ "loss": 0.7813,
1744
+ "step": 2260
1745
+ },
1746
+ {
1747
+ "epoch": 17.46153846153846,
1748
+ "grad_norm": 2.1126835346221924,
1749
+ "learning_rate": 2.5384615384615385e-06,
1750
+ "loss": 0.7003,
1751
+ "step": 2270
1752
+ },
1753
+ {
1754
+ "epoch": 17.53846153846154,
1755
+ "grad_norm": 3.5276880264282227,
1756
+ "learning_rate": 2.461538461538462e-06,
1757
+ "loss": 0.8305,
1758
+ "step": 2280
1759
+ },
1760
+ {
1761
+ "epoch": 17.615384615384617,
1762
+ "grad_norm": 2.3967173099517822,
1763
+ "learning_rate": 2.384615384615385e-06,
1764
+ "loss": 0.7627,
1765
+ "step": 2290
1766
+ },
1767
+ {
1768
+ "epoch": 17.692307692307693,
1769
+ "grad_norm": 4.473978042602539,
1770
+ "learning_rate": 2.307692307692308e-06,
1771
+ "loss": 0.7332,
1772
+ "step": 2300
1773
+ },
1774
+ {
1775
+ "epoch": 17.76923076923077,
1776
+ "grad_norm": 2.1642568111419678,
1777
+ "learning_rate": 2.230769230769231e-06,
1778
+ "loss": 0.7678,
1779
+ "step": 2310
1780
+ },
1781
+ {
1782
+ "epoch": 17.846153846153847,
1783
+ "grad_norm": 3.03192138671875,
1784
+ "learning_rate": 2.153846153846154e-06,
1785
+ "loss": 0.7565,
1786
+ "step": 2320
1787
+ },
1788
+ {
1789
+ "epoch": 17.923076923076923,
1790
+ "grad_norm": 2.9610419273376465,
1791
+ "learning_rate": 2.0769230769230773e-06,
1792
+ "loss": 0.7651,
1793
+ "step": 2330
1794
+ },
1795
+ {
1796
+ "epoch": 18.0,
1797
+ "grad_norm": 4.160178184509277,
1798
+ "learning_rate": 2.0000000000000003e-06,
1799
+ "loss": 0.7778,
1800
+ "step": 2340
1801
+ },
1802
+ {
1803
+ "epoch": 18.0,
1804
+ "eval_accuracy": 0.8270676691729323,
1805
+ "eval_loss": 0.6930322647094727,
1806
+ "eval_runtime": 0.7854,
1807
+ "eval_samples_per_second": 169.332,
1808
+ "eval_steps_per_second": 21.644,
1809
+ "step": 2340
1810
+ },
1811
+ {
1812
+ "epoch": 18.076923076923077,
1813
+ "grad_norm": 2.168921947479248,
1814
+ "learning_rate": 1.9230769230769234e-06,
1815
+ "loss": 0.7234,
1816
+ "step": 2350
1817
+ },
1818
+ {
1819
+ "epoch": 18.153846153846153,
1820
+ "grad_norm": 3.935608386993408,
1821
+ "learning_rate": 1.8461538461538465e-06,
1822
+ "loss": 0.8192,
1823
+ "step": 2360
1824
+ },
1825
+ {
1826
+ "epoch": 18.23076923076923,
1827
+ "grad_norm": 1.8215328454971313,
1828
+ "learning_rate": 1.7692307692307695e-06,
1829
+ "loss": 0.7271,
1830
+ "step": 2370
1831
+ },
1832
+ {
1833
+ "epoch": 18.307692307692307,
1834
+ "grad_norm": 2.687016010284424,
1835
+ "learning_rate": 1.6923076923076926e-06,
1836
+ "loss": 0.8063,
1837
+ "step": 2380
1838
+ },
1839
+ {
1840
+ "epoch": 18.384615384615383,
1841
+ "grad_norm": 2.3364577293395996,
1842
+ "learning_rate": 1.6153846153846157e-06,
1843
+ "loss": 0.7699,
1844
+ "step": 2390
1845
+ },
1846
+ {
1847
+ "epoch": 18.46153846153846,
1848
+ "grad_norm": 2.7465319633483887,
1849
+ "learning_rate": 1.5384615384615387e-06,
1850
+ "loss": 0.8214,
1851
+ "step": 2400
1852
+ },
1853
+ {
1854
+ "epoch": 18.53846153846154,
1855
+ "grad_norm": 3.3499436378479004,
1856
+ "learning_rate": 1.4615384615384618e-06,
1857
+ "loss": 0.7432,
1858
+ "step": 2410
1859
+ },
1860
+ {
1861
+ "epoch": 18.615384615384617,
1862
+ "grad_norm": 3.7266149520874023,
1863
+ "learning_rate": 1.3846153846153848e-06,
1864
+ "loss": 0.797,
1865
+ "step": 2420
1866
+ },
1867
+ {
1868
+ "epoch": 18.692307692307693,
1869
+ "grad_norm": 2.661741256713867,
1870
+ "learning_rate": 1.307692307692308e-06,
1871
+ "loss": 0.7404,
1872
+ "step": 2430
1873
+ },
1874
+ {
1875
+ "epoch": 18.76923076923077,
1876
+ "grad_norm": 3.166747808456421,
1877
+ "learning_rate": 1.230769230769231e-06,
1878
+ "loss": 0.8197,
1879
+ "step": 2440
1880
+ },
1881
+ {
1882
+ "epoch": 18.846153846153847,
1883
+ "grad_norm": 3.200448989868164,
1884
+ "learning_rate": 1.153846153846154e-06,
1885
+ "loss": 0.8068,
1886
+ "step": 2450
1887
+ },
1888
+ {
1889
+ "epoch": 18.923076923076923,
1890
+ "grad_norm": 2.4404191970825195,
1891
+ "learning_rate": 1.076923076923077e-06,
1892
+ "loss": 0.788,
1893
+ "step": 2460
1894
+ },
1895
+ {
1896
+ "epoch": 19.0,
1897
+ "grad_norm": 3.8639049530029297,
1898
+ "learning_rate": 1.0000000000000002e-06,
1899
+ "loss": 0.8081,
1900
+ "step": 2470
1901
+ },
1902
+ {
1903
+ "epoch": 19.0,
1904
+ "eval_accuracy": 0.8646616541353384,
1905
+ "eval_loss": 0.6890266537666321,
1906
+ "eval_runtime": 0.7797,
1907
+ "eval_samples_per_second": 170.576,
1908
+ "eval_steps_per_second": 21.803,
1909
+ "step": 2470
1910
+ },
1911
+ {
1912
+ "epoch": 19.076923076923077,
1913
+ "grad_norm": 1.7245137691497803,
1914
+ "learning_rate": 9.230769230769232e-07,
1915
+ "loss": 0.7929,
1916
+ "step": 2480
1917
+ },
1918
+ {
1919
+ "epoch": 19.153846153846153,
1920
+ "grad_norm": 3.7959182262420654,
1921
+ "learning_rate": 8.461538461538463e-07,
1922
+ "loss": 0.7397,
1923
+ "step": 2490
1924
+ },
1925
+ {
1926
+ "epoch": 19.23076923076923,
1927
+ "grad_norm": 2.798788070678711,
1928
+ "learning_rate": 7.692307692307694e-07,
1929
+ "loss": 0.7928,
1930
+ "step": 2500
1931
+ },
1932
+ {
1933
+ "epoch": 19.307692307692307,
1934
+ "grad_norm": 2.1275336742401123,
1935
+ "learning_rate": 6.923076923076924e-07,
1936
+ "loss": 0.7672,
1937
+ "step": 2510
1938
+ },
1939
+ {
1940
+ "epoch": 19.384615384615383,
1941
+ "grad_norm": 2.9216866493225098,
1942
+ "learning_rate": 6.153846153846155e-07,
1943
+ "loss": 0.7918,
1944
+ "step": 2520
1945
+ },
1946
+ {
1947
+ "epoch": 19.46153846153846,
1948
+ "grad_norm": 2.3012797832489014,
1949
+ "learning_rate": 5.384615384615386e-07,
1950
+ "loss": 0.7418,
1951
+ "step": 2530
1952
+ },
1953
+ {
1954
+ "epoch": 19.53846153846154,
1955
+ "grad_norm": 2.5353312492370605,
1956
+ "learning_rate": 4.615384615384616e-07,
1957
+ "loss": 0.8115,
1958
+ "step": 2540
1959
+ },
1960
+ {
1961
+ "epoch": 19.615384615384617,
1962
+ "grad_norm": 3.469372510910034,
1963
+ "learning_rate": 3.846153846153847e-07,
1964
+ "loss": 0.7698,
1965
+ "step": 2550
1966
+ },
1967
+ {
1968
+ "epoch": 19.692307692307693,
1969
+ "grad_norm": 2.3621013164520264,
1970
+ "learning_rate": 3.0769230769230774e-07,
1971
+ "loss": 0.6997,
1972
+ "step": 2560
1973
+ },
1974
+ {
1975
+ "epoch": 19.76923076923077,
1976
+ "grad_norm": 1.7231149673461914,
1977
+ "learning_rate": 2.307692307692308e-07,
1978
+ "loss": 0.7207,
1979
+ "step": 2570
1980
+ },
1981
+ {
1982
+ "epoch": 19.846153846153847,
1983
+ "grad_norm": 5.3792924880981445,
1984
+ "learning_rate": 1.5384615384615387e-07,
1985
+ "loss": 0.7656,
1986
+ "step": 2580
1987
+ },
1988
+ {
1989
+ "epoch": 19.923076923076923,
1990
+ "grad_norm": 1.9618691205978394,
1991
+ "learning_rate": 7.692307692307694e-08,
1992
+ "loss": 0.6919,
1993
+ "step": 2590
1994
+ },
1995
+ {
1996
+ "epoch": 20.0,
1997
+ "grad_norm": 4.051193714141846,
1998
+ "learning_rate": 0.0,
1999
+ "loss": 0.7916,
2000
+ "step": 2600
2001
+ },
2002
+ {
2003
+ "epoch": 20.0,
2004
+ "eval_accuracy": 0.8646616541353384,
2005
+ "eval_loss": 0.6874601244926453,
2006
+ "eval_runtime": 0.8096,
2007
+ "eval_samples_per_second": 164.271,
2008
+ "eval_steps_per_second": 20.997,
2009
+ "step": 2600
2010
+ },
2011
+ {
2012
+ "epoch": 20.0,
2013
+ "step": 2600,
2014
+ "total_flos": 2.0877820672794624e+17,
2015
+ "train_loss": 0.19350949709232038,
2016
+ "train_runtime": 49.806,
2017
+ "train_samples_per_second": 415.211,
2018
+ "train_steps_per_second": 52.203
2019
  }
2020
  ],
2021
  "logging_steps": 10,
2022
+ "max_steps": 2600,
2023
  "num_input_tokens_seen": 0,
2024
+ "num_train_epochs": 20,
2025
  "save_steps": 500,
2026
  "stateful_callbacks": {
2027
  "TrainerControl": {
 
2035
  "attributes": {}
2036
  }
2037
  },
2038
+ "total_flos": 2.0877820672794624e+17,
2039
  "train_batch_size": 8,
2040
  "trial_name": null,
2041
  "trial_params": null