stefania-radu
commited on
Commit
•
c8065c4
1
Parent(s):
5791552
Training in progress, step 300000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e419578f524e1611c3d5902d97ccf8efc92603d3b33f13949516891bb5476e00
|
3 |
size 893441530
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 454197066
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3926e99c54fb7c961027b62d11498ced043c43266a692f441538601c7a5f10f7
|
3 |
size 454197066
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c42cb907bbd858ffd2b5bae4767d672ae3b753bd7ac85cb51990a0992d4df69a
|
3 |
+
size 14308
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08b25c62270eb67709cff9418808f83a4a7710e7ce508a964ce593dde6417e23
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2152,11 +2152,85 @@
|
|
2152 |
"eval_samples_per_second": 103.835,
|
2153 |
"eval_steps_per_second": 12.979,
|
2154 |
"step": 290000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2155 |
}
|
2156 |
],
|
2157 |
"max_steps": 1000000,
|
2158 |
"num_train_epochs": 9223372036854775807,
|
2159 |
-
"total_flos": 2.
|
2160 |
"trial_name": null,
|
2161 |
"trial_params": null
|
2162 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.3,
|
5 |
+
"global_step": 300000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2152 |
"eval_samples_per_second": 103.835,
|
2153 |
"eval_steps_per_second": 12.979,
|
2154 |
"step": 290000
|
2155 |
+
},
|
2156 |
+
{
|
2157 |
+
"epoch": 0.29,
|
2158 |
+
"learning_rate": 1.7432579625275767e-05,
|
2159 |
+
"loss": 0.3557,
|
2160 |
+
"step": 291000
|
2161 |
+
},
|
2162 |
+
{
|
2163 |
+
"epoch": 0.29,
|
2164 |
+
"learning_rate": 1.7422214882158484e-05,
|
2165 |
+
"loss": 0.3569,
|
2166 |
+
"step": 292000
|
2167 |
+
},
|
2168 |
+
{
|
2169 |
+
"epoch": 0.29,
|
2170 |
+
"learning_rate": 1.7411816815187455e-05,
|
2171 |
+
"loss": 0.3564,
|
2172 |
+
"step": 293000
|
2173 |
+
},
|
2174 |
+
{
|
2175 |
+
"epoch": 0.29,
|
2176 |
+
"learning_rate": 1.7401385538074276e-05,
|
2177 |
+
"loss": 0.3574,
|
2178 |
+
"step": 294000
|
2179 |
+
},
|
2180 |
+
{
|
2181 |
+
"epoch": 0.29,
|
2182 |
+
"learning_rate": 1.7390921164893724e-05,
|
2183 |
+
"loss": 0.3573,
|
2184 |
+
"step": 295000
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 0.29,
|
2188 |
+
"eval_runtime": 3244.7523,
|
2189 |
+
"eval_samples_per_second": 104.677,
|
2190 |
+
"eval_steps_per_second": 13.085,
|
2191 |
+
"step": 295000
|
2192 |
+
},
|
2193 |
+
{
|
2194 |
+
"epoch": 0.3,
|
2195 |
+
"learning_rate": 1.7380423810082507e-05,
|
2196 |
+
"loss": 0.353,
|
2197 |
+
"step": 296000
|
2198 |
+
},
|
2199 |
+
{
|
2200 |
+
"epoch": 0.3,
|
2201 |
+
"learning_rate": 1.7369893588438012e-05,
|
2202 |
+
"loss": 0.3568,
|
2203 |
+
"step": 297000
|
2204 |
+
},
|
2205 |
+
{
|
2206 |
+
"epoch": 0.3,
|
2207 |
+
"learning_rate": 1.7359330615117058e-05,
|
2208 |
+
"loss": 0.3537,
|
2209 |
+
"step": 298000
|
2210 |
+
},
|
2211 |
+
{
|
2212 |
+
"epoch": 0.3,
|
2213 |
+
"learning_rate": 1.734873500563463e-05,
|
2214 |
+
"loss": 0.3576,
|
2215 |
+
"step": 299000
|
2216 |
+
},
|
2217 |
+
{
|
2218 |
+
"epoch": 0.3,
|
2219 |
+
"learning_rate": 1.7338106875862617e-05,
|
2220 |
+
"loss": 0.3575,
|
2221 |
+
"step": 300000
|
2222 |
+
},
|
2223 |
+
{
|
2224 |
+
"epoch": 0.3,
|
2225 |
+
"eval_runtime": 3130.8127,
|
2226 |
+
"eval_samples_per_second": 108.487,
|
2227 |
+
"eval_steps_per_second": 13.561,
|
2228 |
+
"step": 300000
|
2229 |
}
|
2230 |
],
|
2231 |
"max_steps": 1000000,
|
2232 |
"num_train_epochs": 9223372036854775807,
|
2233 |
+
"total_flos": 2.656383051969331e+21,
|
2234 |
"trial_name": null,
|
2235 |
"trial_params": null
|
2236 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 454197066
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3926e99c54fb7c961027b62d11498ced043c43266a692f441538601c7a5f10f7
|
3 |
size 454197066
|