Commit
•
70ed69b
1
Parent(s):
399786b
Training in progress, step 165720
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +135 -3
- pytorch_model.bin +1 -1
- runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2401461637
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d74fdb539d44a35348b6810c0d848c4bf56592bd5cf768ed81575f1ebb8bcb14
|
3 |
size 2401461637
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a5ec2ac5d923b5dd8a99ca581137ee6c85b4ea12bee1568d23a8455d2a58084
|
3 |
size 1200739717
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:510fc81178791ae60890b99ad1e29739b0a1957018583076b8ffe9ee3580c1d3
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d8dac73ce104ca44387089f603547c06c75d1a306882323278557a38f3d277f
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1878,11 +1878,143 @@
|
|
1878 |
"learning_rate": 4.677635297967928e-06,
|
1879 |
"loss": 2.7807,
|
1880 |
"step": 154500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1881 |
}
|
1882 |
],
|
1883 |
"max_steps": 201666,
|
1884 |
"num_train_epochs": 3,
|
1885 |
-
"total_flos": 1.
|
1886 |
"trial_name": null,
|
1887 |
"trial_params": null
|
1888 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.4652643479813157,
|
5 |
+
"global_step": 165720,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1878 |
"learning_rate": 4.677635297967928e-06,
|
1879 |
"loss": 2.7807,
|
1880 |
"step": 154500
|
1881 |
+
},
|
1882 |
+
{
|
1883 |
+
"epoch": 2.31,
|
1884 |
+
"learning_rate": 4.628048357184653e-06,
|
1885 |
+
"loss": 2.7652,
|
1886 |
+
"step": 155000
|
1887 |
+
},
|
1888 |
+
{
|
1889 |
+
"epoch": 2.31,
|
1890 |
+
"learning_rate": 4.578461416401377e-06,
|
1891 |
+
"loss": 2.7918,
|
1892 |
+
"step": 155500
|
1893 |
+
},
|
1894 |
+
{
|
1895 |
+
"epoch": 2.32,
|
1896 |
+
"learning_rate": 4.528874475618102e-06,
|
1897 |
+
"loss": 2.7771,
|
1898 |
+
"step": 156000
|
1899 |
+
},
|
1900 |
+
{
|
1901 |
+
"epoch": 2.33,
|
1902 |
+
"learning_rate": 4.479287534834827e-06,
|
1903 |
+
"loss": 2.7561,
|
1904 |
+
"step": 156500
|
1905 |
+
},
|
1906 |
+
{
|
1907 |
+
"epoch": 2.34,
|
1908 |
+
"learning_rate": 4.429700594051551e-06,
|
1909 |
+
"loss": 2.7789,
|
1910 |
+
"step": 157000
|
1911 |
+
},
|
1912 |
+
{
|
1913 |
+
"epoch": 2.34,
|
1914 |
+
"learning_rate": 4.380113653268276e-06,
|
1915 |
+
"loss": 2.7629,
|
1916 |
+
"step": 157500
|
1917 |
+
},
|
1918 |
+
{
|
1919 |
+
"epoch": 2.35,
|
1920 |
+
"learning_rate": 4.330526712485001e-06,
|
1921 |
+
"loss": 2.7672,
|
1922 |
+
"step": 158000
|
1923 |
+
},
|
1924 |
+
{
|
1925 |
+
"epoch": 2.36,
|
1926 |
+
"learning_rate": 4.280939771701725e-06,
|
1927 |
+
"loss": 2.7643,
|
1928 |
+
"step": 158500
|
1929 |
+
},
|
1930 |
+
{
|
1931 |
+
"epoch": 2.37,
|
1932 |
+
"learning_rate": 4.23135283091845e-06,
|
1933 |
+
"loss": 2.7836,
|
1934 |
+
"step": 159000
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 2.37,
|
1938 |
+
"learning_rate": 4.181765890135175e-06,
|
1939 |
+
"loss": 2.7742,
|
1940 |
+
"step": 159500
|
1941 |
+
},
|
1942 |
+
{
|
1943 |
+
"epoch": 2.38,
|
1944 |
+
"learning_rate": 4.132178949351899e-06,
|
1945 |
+
"loss": 2.7504,
|
1946 |
+
"step": 160000
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 2.39,
|
1950 |
+
"learning_rate": 4.082592008568624e-06,
|
1951 |
+
"loss": 2.7738,
|
1952 |
+
"step": 160500
|
1953 |
+
},
|
1954 |
+
{
|
1955 |
+
"epoch": 2.4,
|
1956 |
+
"learning_rate": 4.033005067785349e-06,
|
1957 |
+
"loss": 2.7741,
|
1958 |
+
"step": 161000
|
1959 |
+
},
|
1960 |
+
{
|
1961 |
+
"epoch": 2.4,
|
1962 |
+
"learning_rate": 3.983418127002074e-06,
|
1963 |
+
"loss": 2.768,
|
1964 |
+
"step": 161500
|
1965 |
+
},
|
1966 |
+
{
|
1967 |
+
"epoch": 2.41,
|
1968 |
+
"learning_rate": 3.933831186218798e-06,
|
1969 |
+
"loss": 2.7874,
|
1970 |
+
"step": 162000
|
1971 |
+
},
|
1972 |
+
{
|
1973 |
+
"epoch": 2.42,
|
1974 |
+
"learning_rate": 3.884244245435523e-06,
|
1975 |
+
"loss": 2.7733,
|
1976 |
+
"step": 162500
|
1977 |
+
},
|
1978 |
+
{
|
1979 |
+
"epoch": 2.42,
|
1980 |
+
"learning_rate": 3.834657304652248e-06,
|
1981 |
+
"loss": 2.7677,
|
1982 |
+
"step": 163000
|
1983 |
+
},
|
1984 |
+
{
|
1985 |
+
"epoch": 2.43,
|
1986 |
+
"learning_rate": 3.7850703638689717e-06,
|
1987 |
+
"loss": 2.7601,
|
1988 |
+
"step": 163500
|
1989 |
+
},
|
1990 |
+
{
|
1991 |
+
"epoch": 2.44,
|
1992 |
+
"learning_rate": 3.735483423085696e-06,
|
1993 |
+
"loss": 2.7832,
|
1994 |
+
"step": 164000
|
1995 |
+
},
|
1996 |
+
{
|
1997 |
+
"epoch": 2.45,
|
1998 |
+
"learning_rate": 3.685896482302421e-06,
|
1999 |
+
"loss": 2.7704,
|
2000 |
+
"step": 164500
|
2001 |
+
},
|
2002 |
+
{
|
2003 |
+
"epoch": 2.45,
|
2004 |
+
"learning_rate": 3.6363095415191457e-06,
|
2005 |
+
"loss": 2.7698,
|
2006 |
+
"step": 165000
|
2007 |
+
},
|
2008 |
+
{
|
2009 |
+
"epoch": 2.46,
|
2010 |
+
"learning_rate": 3.5867226007358706e-06,
|
2011 |
+
"loss": 2.7724,
|
2012 |
+
"step": 165500
|
2013 |
}
|
2014 |
],
|
2015 |
"max_steps": 201666,
|
2016 |
"num_train_epochs": 3,
|
2017 |
+
"total_flos": 1.9488674798095565e+17,
|
2018 |
"trial_name": null,
|
2019 |
"trial_params": null
|
2020 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a5ec2ac5d923b5dd8a99ca581137ee6c85b4ea12bee1568d23a8455d2a58084
|
3 |
size 1200739717
|
runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7dd64fd5f5ee57511fff720cbb56340f4a2adf91133fae41a0df73cab017476
|
3 |
+
size 57631
|