Training in progress, step 250000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:981f9cd48efb9aa651b5446906cfef65e3f5e49845175ea67dca9f635e9c1038
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22078b7cadf342376e128bb1b061228e01c38cac2488c2ab30ca05231e48ad59
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac406c207b2f7429e3395f91a987edf514c0aeff745dd88acf0897c2d8c0d2f5
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a150bbb8b25e860b9b4e4d20308c75cd4211d4f66e73ca0ae830a1483ae3793d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba99bff5574a5463122f2331dbbdbc3c75f9c55701d70a2ea0f51810fd185527
|
3 |
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b4ebfa40194a9690e4a04456095f6b6e0d44abd914d615940586748348af8ed
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1301777ffe2dc7d45e2808f549b6c6f37d8616c9227a415a7757312ea67a80a1
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1782,11 +1782,85 @@
|
|
1782 |
"eval_samples_per_second": 990.396,
|
1783 |
"eval_steps_per_second": 15.846,
|
1784 |
"step": 240000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1785 |
}
|
1786 |
],
|
1787 |
"max_steps": 1000000,
|
1788 |
"num_train_epochs": 16,
|
1789 |
-
"total_flos": 1.
|
1790 |
"trial_name": null,
|
1791 |
"trial_params": null
|
1792 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.81755157512178,
|
5 |
+
"global_step": 250000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1782 |
"eval_samples_per_second": 990.396,
|
1783 |
"eval_steps_per_second": 15.846,
|
1784 |
"step": 240000
|
1785 |
+
},
|
1786 |
+
{
|
1787 |
+
"epoch": 3.68,
|
1788 |
+
"learning_rate": 0.00013649481627508181,
|
1789 |
+
"loss": 0.3031,
|
1790 |
+
"step": 241000
|
1791 |
+
},
|
1792 |
+
{
|
1793 |
+
"epoch": 3.7,
|
1794 |
+
"learning_rate": 0.0001363578251256578,
|
1795 |
+
"loss": 0.3023,
|
1796 |
+
"step": 242000
|
1797 |
+
},
|
1798 |
+
{
|
1799 |
+
"epoch": 3.71,
|
1800 |
+
"learning_rate": 0.00013622021765608754,
|
1801 |
+
"loss": 0.3022,
|
1802 |
+
"step": 243000
|
1803 |
+
},
|
1804 |
+
{
|
1805 |
+
"epoch": 3.73,
|
1806 |
+
"learning_rate": 0.00013608199537122425,
|
1807 |
+
"loss": 0.3017,
|
1808 |
+
"step": 244000
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 3.74,
|
1812 |
+
"learning_rate": 0.0001359431597826447,
|
1813 |
+
"loss": 0.3019,
|
1814 |
+
"step": 245000
|
1815 |
+
},
|
1816 |
+
{
|
1817 |
+
"epoch": 3.74,
|
1818 |
+
"eval_runtime": 1.0744,
|
1819 |
+
"eval_samples_per_second": 930.717,
|
1820 |
+
"eval_steps_per_second": 14.891,
|
1821 |
+
"step": 245000
|
1822 |
+
},
|
1823 |
+
{
|
1824 |
+
"epoch": 3.76,
|
1825 |
+
"learning_rate": 0.0001358037124086327,
|
1826 |
+
"loss": 0.3015,
|
1827 |
+
"step": 246000
|
1828 |
+
},
|
1829 |
+
{
|
1830 |
+
"epoch": 3.77,
|
1831 |
+
"learning_rate": 0.00013566365477416233,
|
1832 |
+
"loss": 0.3018,
|
1833 |
+
"step": 247000
|
1834 |
+
},
|
1835 |
+
{
|
1836 |
+
"epoch": 3.79,
|
1837 |
+
"learning_rate": 0.00013552298841088144,
|
1838 |
+
"loss": 0.3013,
|
1839 |
+
"step": 248000
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 3.8,
|
1843 |
+
"learning_rate": 0.00013538171485709486,
|
1844 |
+
"loss": 0.3006,
|
1845 |
+
"step": 249000
|
1846 |
+
},
|
1847 |
+
{
|
1848 |
+
"epoch": 3.82,
|
1849 |
+
"learning_rate": 0.00013523983565774753,
|
1850 |
+
"loss": 0.3008,
|
1851 |
+
"step": 250000
|
1852 |
+
},
|
1853 |
+
{
|
1854 |
+
"epoch": 3.82,
|
1855 |
+
"eval_runtime": 1.0168,
|
1856 |
+
"eval_samples_per_second": 983.434,
|
1857 |
+
"eval_steps_per_second": 15.735,
|
1858 |
+
"step": 250000
|
1859 |
}
|
1860 |
],
|
1861 |
"max_steps": 1000000,
|
1862 |
"num_train_epochs": 16,
|
1863 |
+
"total_flos": 1.7525045545542324e+22,
|
1864 |
"trial_name": null,
|
1865 |
"trial_params": null
|
1866 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22078b7cadf342376e128bb1b061228e01c38cac2488c2ab30ca05231e48ad59
|
3 |
size 449471589
|