Training in progress, step 260000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80fb9af3e1214502ea925d3599d3b6efda5d8711659b2c90eec052ea369703f7
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ab3051ab890d6ef671960c94245505fe32b7894ead10b5d7877e9fabce99ffc
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72757db14fb233c9a7d81cef99ff1e84d48a76f7d40a8342399239cffc8c3e9f
|
3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8fbef78331483305594b3f85c6f32ce7bab8b9f165e2fb904f362fb3239105f
|
3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b31003ab7fcda6d6f6ff162f700b04d49c90c118129057bd1f4ee624c393e588
|
3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1da43c666b0af3cbe77f19371961622ab99b1114e6beec7303d695cd496caee4
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04396ff3c1feb0819b380c26b60d419205385a8e624d7c59a71eaea179611c46
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1856,11 +1856,85 @@
|
|
1856 |
"eval_samples_per_second": 983.434,
|
1857 |
"eval_steps_per_second": 15.735,
|
1858 |
"step": 250000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1859 |
}
|
1860 |
],
|
1861 |
"max_steps": 1000000,
|
1862 |
"num_train_epochs": 16,
|
1863 |
-
"total_flos": 1.
|
1864 |
"trial_name": null,
|
1865 |
"trial_params": null
|
1866 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.970253638126651,
|
5 |
+
"global_step": 260000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1856 |
"eval_samples_per_second": 983.434,
|
1857 |
"eval_steps_per_second": 15.735,
|
1858 |
"step": 250000
|
1859 |
+
},
|
1860 |
+
{
|
1861 |
+
"epoch": 3.83,
|
1862 |
+
"learning_rate": 0.00013509735236440766,
|
1863 |
+
"loss": 0.3003,
|
1864 |
+
"step": 251000
|
1865 |
+
},
|
1866 |
+
{
|
1867 |
+
"epoch": 3.85,
|
1868 |
+
"learning_rate": 0.00013495426653524972,
|
1869 |
+
"loss": 0.3,
|
1870 |
+
"step": 252000
|
1871 |
+
},
|
1872 |
+
{
|
1873 |
+
"epoch": 3.86,
|
1874 |
+
"learning_rate": 0.00013481057973503742,
|
1875 |
+
"loss": 0.3,
|
1876 |
+
"step": 253000
|
1877 |
+
},
|
1878 |
+
{
|
1879 |
+
"epoch": 3.88,
|
1880 |
+
"learning_rate": 0.00013466629353510651,
|
1881 |
+
"loss": 0.2997,
|
1882 |
+
"step": 254000
|
1883 |
+
},
|
1884 |
+
{
|
1885 |
+
"epoch": 3.89,
|
1886 |
+
"learning_rate": 0.00013452140951334787,
|
1887 |
+
"loss": 0.2995,
|
1888 |
+
"step": 255000
|
1889 |
+
},
|
1890 |
+
{
|
1891 |
+
"epoch": 3.89,
|
1892 |
+
"eval_runtime": 0.8192,
|
1893 |
+
"eval_samples_per_second": 1220.744,
|
1894 |
+
"eval_steps_per_second": 19.532,
|
1895 |
+
"step": 255000
|
1896 |
+
},
|
1897 |
+
{
|
1898 |
+
"epoch": 3.91,
|
1899 |
+
"learning_rate": 0.00013437592925418985,
|
1900 |
+
"loss": 0.2996,
|
1901 |
+
"step": 256000
|
1902 |
+
},
|
1903 |
+
{
|
1904 |
+
"epoch": 3.92,
|
1905 |
+
"learning_rate": 0.00013422985434858133,
|
1906 |
+
"loss": 0.299,
|
1907 |
+
"step": 257000
|
1908 |
+
},
|
1909 |
+
{
|
1910 |
+
"epoch": 3.94,
|
1911 |
+
"learning_rate": 0.00013408318639397405,
|
1912 |
+
"loss": 0.2987,
|
1913 |
+
"step": 258000
|
1914 |
+
},
|
1915 |
+
{
|
1916 |
+
"epoch": 3.95,
|
1917 |
+
"learning_rate": 0.00013393592699430525,
|
1918 |
+
"loss": 0.2986,
|
1919 |
+
"step": 259000
|
1920 |
+
},
|
1921 |
+
{
|
1922 |
+
"epoch": 3.97,
|
1923 |
+
"learning_rate": 0.00013378807775998012,
|
1924 |
+
"loss": 0.2984,
|
1925 |
+
"step": 260000
|
1926 |
+
},
|
1927 |
+
{
|
1928 |
+
"epoch": 3.97,
|
1929 |
+
"eval_runtime": 1.0461,
|
1930 |
+
"eval_samples_per_second": 955.963,
|
1931 |
+
"eval_steps_per_second": 15.295,
|
1932 |
+
"step": 260000
|
1933 |
}
|
1934 |
],
|
1935 |
"max_steps": 1000000,
|
1936 |
"num_train_epochs": 16,
|
1937 |
+
"total_flos": 1.8226048155845795e+22,
|
1938 |
"trial_name": null,
|
1939 |
"trial_params": null
|
1940 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ab3051ab890d6ef671960c94245505fe32b7894ead10b5d7877e9fabce99ffc
|
3 |
size 449471589
|