Training in progress, step 30000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1115513717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2754a19deafed33f80b8bc9e1d496271fbdefd4dde6d02139929e3ddf01b008f
|
3 |
size 1115513717
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e6f2984904c3cd3157bcf9409a340770e020350dad6bed8f8d65bce2381442b
|
3 |
+
size 15523
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d3cf89aef0bc5afe4fc7de6b797864a345f7acd73a830413d35a9d7579e5dbe
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1672,11 +1672,169 @@
|
|
1672 |
"learning_rate": 0.0,
|
1673 |
"loss": 0.6499,
|
1674 |
"step": 27500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1675 |
}
|
1676 |
],
|
1677 |
"max_steps": 30516,
|
1678 |
"num_train_epochs": 2,
|
1679 |
-
"total_flos": 1.
|
1680 |
"trial_name": null,
|
1681 |
"trial_params": null
|
1682 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7695983011856309,
|
5 |
+
"global_step": 30000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1672 |
"learning_rate": 0.0,
|
1673 |
"loss": 0.6499,
|
1674 |
"step": 27500
|
1675 |
+
},
|
1676 |
+
{
|
1677 |
+
"epoch": 1.63,
|
1678 |
+
"learning_rate": 0.0,
|
1679 |
+
"loss": 0.6648,
|
1680 |
+
"step": 27600
|
1681 |
+
},
|
1682 |
+
{
|
1683 |
+
"epoch": 1.63,
|
1684 |
+
"learning_rate": 0.0,
|
1685 |
+
"loss": 0.6292,
|
1686 |
+
"step": 27700
|
1687 |
+
},
|
1688 |
+
{
|
1689 |
+
"epoch": 1.64,
|
1690 |
+
"learning_rate": 0.0,
|
1691 |
+
"loss": 0.5542,
|
1692 |
+
"step": 27800
|
1693 |
+
},
|
1694 |
+
{
|
1695 |
+
"epoch": 1.65,
|
1696 |
+
"learning_rate": 0.0,
|
1697 |
+
"loss": 0.5549,
|
1698 |
+
"step": 27900
|
1699 |
+
},
|
1700 |
+
{
|
1701 |
+
"epoch": 1.65,
|
1702 |
+
"learning_rate": 0.0,
|
1703 |
+
"loss": 0.6058,
|
1704 |
+
"step": 28000
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 1.66,
|
1708 |
+
"learning_rate": 0.0,
|
1709 |
+
"loss": 0.565,
|
1710 |
+
"step": 28100
|
1711 |
+
},
|
1712 |
+
{
|
1713 |
+
"epoch": 1.66,
|
1714 |
+
"learning_rate": 0.0,
|
1715 |
+
"loss": 0.7065,
|
1716 |
+
"step": 28200
|
1717 |
+
},
|
1718 |
+
{
|
1719 |
+
"epoch": 1.67,
|
1720 |
+
"learning_rate": 0.0,
|
1721 |
+
"loss": 0.6621,
|
1722 |
+
"step": 28300
|
1723 |
+
},
|
1724 |
+
{
|
1725 |
+
"epoch": 1.68,
|
1726 |
+
"learning_rate": 0.0,
|
1727 |
+
"loss": 0.6052,
|
1728 |
+
"step": 28400
|
1729 |
+
},
|
1730 |
+
{
|
1731 |
+
"epoch": 1.68,
|
1732 |
+
"learning_rate": 0.0,
|
1733 |
+
"loss": 0.6506,
|
1734 |
+
"step": 28500
|
1735 |
+
},
|
1736 |
+
{
|
1737 |
+
"epoch": 1.69,
|
1738 |
+
"learning_rate": 0.0,
|
1739 |
+
"loss": 0.7008,
|
1740 |
+
"step": 28600
|
1741 |
+
},
|
1742 |
+
{
|
1743 |
+
"epoch": 1.69,
|
1744 |
+
"learning_rate": 0.0,
|
1745 |
+
"loss": 0.6847,
|
1746 |
+
"step": 28700
|
1747 |
+
},
|
1748 |
+
{
|
1749 |
+
"epoch": 1.7,
|
1750 |
+
"learning_rate": 0.0,
|
1751 |
+
"loss": 0.6591,
|
1752 |
+
"step": 28800
|
1753 |
+
},
|
1754 |
+
{
|
1755 |
+
"epoch": 1.7,
|
1756 |
+
"learning_rate": 0.0,
|
1757 |
+
"loss": 0.6289,
|
1758 |
+
"step": 28900
|
1759 |
+
},
|
1760 |
+
{
|
1761 |
+
"epoch": 1.71,
|
1762 |
+
"learning_rate": 0.0,
|
1763 |
+
"loss": 0.5949,
|
1764 |
+
"step": 29000
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 1.72,
|
1768 |
+
"learning_rate": 0.0,
|
1769 |
+
"loss": 0.6497,
|
1770 |
+
"step": 29100
|
1771 |
+
},
|
1772 |
+
{
|
1773 |
+
"epoch": 1.72,
|
1774 |
+
"learning_rate": 0.0,
|
1775 |
+
"loss": 0.5947,
|
1776 |
+
"step": 29200
|
1777 |
+
},
|
1778 |
+
{
|
1779 |
+
"epoch": 1.73,
|
1780 |
+
"learning_rate": 0.0,
|
1781 |
+
"loss": 0.6483,
|
1782 |
+
"step": 29300
|
1783 |
+
},
|
1784 |
+
{
|
1785 |
+
"epoch": 1.73,
|
1786 |
+
"learning_rate": 0.0,
|
1787 |
+
"loss": 0.5743,
|
1788 |
+
"step": 29400
|
1789 |
+
},
|
1790 |
+
{
|
1791 |
+
"epoch": 1.74,
|
1792 |
+
"learning_rate": 0.0,
|
1793 |
+
"loss": 0.6876,
|
1794 |
+
"step": 29500
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 1.75,
|
1798 |
+
"learning_rate": 0.0,
|
1799 |
+
"loss": 0.6503,
|
1800 |
+
"step": 29600
|
1801 |
+
},
|
1802 |
+
{
|
1803 |
+
"epoch": 1.75,
|
1804 |
+
"learning_rate": 0.0,
|
1805 |
+
"loss": 0.5963,
|
1806 |
+
"step": 29700
|
1807 |
+
},
|
1808 |
+
{
|
1809 |
+
"epoch": 1.76,
|
1810 |
+
"learning_rate": 0.0,
|
1811 |
+
"loss": 0.6361,
|
1812 |
+
"step": 29800
|
1813 |
+
},
|
1814 |
+
{
|
1815 |
+
"epoch": 1.76,
|
1816 |
+
"learning_rate": 0.0,
|
1817 |
+
"loss": 0.6624,
|
1818 |
+
"step": 29900
|
1819 |
+
},
|
1820 |
+
{
|
1821 |
+
"epoch": 1.77,
|
1822 |
+
"learning_rate": 0.0,
|
1823 |
+
"loss": 0.6298,
|
1824 |
+
"step": 30000
|
1825 |
+
},
|
1826 |
+
{
|
1827 |
+
"epoch": 1.77,
|
1828 |
+
"eval_loss": 0.8651230335235596,
|
1829 |
+
"eval_runtime": 546.7955,
|
1830 |
+
"eval_samples_per_second": 29.15,
|
1831 |
+
"eval_steps_per_second": 1.823,
|
1832 |
+
"step": 30000
|
1833 |
}
|
1834 |
],
|
1835 |
"max_steps": 30516,
|
1836 |
"num_train_epochs": 2,
|
1837 |
+
"total_flos": 1.8284013786415104e+17,
|
1838 |
"trial_name": null,
|
1839 |
"trial_params": null
|
1840 |
}
|
runs/Dec05_03-30-37_fbdce2302f52/events.out.tfevents.1670211053.fbdce2302f52.24.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b673d9ccde84e81880a851f0f5abf58604d05b17b5b045206afb9d74ba26fb
|
3 |
+
size 57737
|