Training in progress, step 175000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d331c12222b80b51d79250ea6098c76472b90be4bc5c1b95d65991917017c62e
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:188a8c5d6cc6a340ca48559f2109ceb5056473930bf637b6adf8f022b805db1b
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5da8050f3fec17f0f993422f7c8b9a867de838f9e7f96dff5dc374b416f9e3d4
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffad8df29e4048e4b96e70123f756b3ad525206036229ca856f2bc4d48ba756e
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c09d2e030ed75376e9c598986eb072883a0dd110ee2e3eb3f6f27c0576b91e0b
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2019e6d9402ce5cbaab9baec39e6c5d6ada6535360ac8d7533185c3c59b246f
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a787ade8dc62bf8a75e9b5431f03a8969a01237878239d8a82a05b8998d77c3b
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec0bd4652d15557a0929a867fff0e14635cbe144a105685a7b1df5342b007a93
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2046,6 +2046,66 @@
|
|
2046 |
"learning_rate": 0.00011374934798382673,
|
2047 |
"loss": 0.3396,
|
2048 |
"step": 170000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2049 |
}
|
2050 |
],
|
2051 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.9787150747653213,
|
5 |
+
"global_step": 175000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2046 |
"learning_rate": 0.00011374934798382673,
|
2047 |
"loss": 0.3396,
|
2048 |
"step": 170000
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 2.9,
|
2052 |
+
"learning_rate": 0.00011355651779923088,
|
2053 |
+
"loss": 0.3393,
|
2054 |
+
"step": 170500
|
2055 |
+
},
|
2056 |
+
{
|
2057 |
+
"epoch": 2.91,
|
2058 |
+
"learning_rate": 0.00011336335642535168,
|
2059 |
+
"loss": 0.3391,
|
2060 |
+
"step": 171000
|
2061 |
+
},
|
2062 |
+
{
|
2063 |
+
"epoch": 2.92,
|
2064 |
+
"learning_rate": 0.00011316986576861393,
|
2065 |
+
"loss": 0.3394,
|
2066 |
+
"step": 171500
|
2067 |
+
},
|
2068 |
+
{
|
2069 |
+
"epoch": 2.93,
|
2070 |
+
"learning_rate": 0.00011297604773869221,
|
2071 |
+
"loss": 0.3395,
|
2072 |
+
"step": 172000
|
2073 |
+
},
|
2074 |
+
{
|
2075 |
+
"epoch": 2.94,
|
2076 |
+
"learning_rate": 0.00011278229285900902,
|
2077 |
+
"loss": 0.3397,
|
2078 |
+
"step": 172500
|
2079 |
+
},
|
2080 |
+
{
|
2081 |
+
"epoch": 2.94,
|
2082 |
+
"learning_rate": 0.00011258782646982266,
|
2083 |
+
"loss": 0.3396,
|
2084 |
+
"step": 173000
|
2085 |
+
},
|
2086 |
+
{
|
2087 |
+
"epoch": 2.95,
|
2088 |
+
"learning_rate": 0.0001123930384519453,
|
2089 |
+
"loss": 0.3392,
|
2090 |
+
"step": 173500
|
2091 |
+
},
|
2092 |
+
{
|
2093 |
+
"epoch": 2.96,
|
2094 |
+
"learning_rate": 0.00011219793072785603,
|
2095 |
+
"loss": 0.3394,
|
2096 |
+
"step": 174000
|
2097 |
+
},
|
2098 |
+
{
|
2099 |
+
"epoch": 2.97,
|
2100 |
+
"learning_rate": 0.00011200250522318928,
|
2101 |
+
"loss": 0.3391,
|
2102 |
+
"step": 174500
|
2103 |
+
},
|
2104 |
+
{
|
2105 |
+
"epoch": 2.98,
|
2106 |
+
"learning_rate": 0.00011180676386671593,
|
2107 |
+
"loss": 0.3389,
|
2108 |
+
"step": 175000
|
2109 |
}
|
2110 |
],
|
2111 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:188a8c5d6cc6a340ca48559f2109ceb5056473930bf637b6adf8f022b805db1b
|
3 |
size 201355195
|