Training in progress, step 280000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88919edc5cc9978bcacef57b4d166e6499ad44e7dc4e53de13f434f06da69bf3
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02e1e464ec648e1c27493b3bd8c73cd61efe7f80d5890c32e96d2d92c2d0ec34
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a1c61ad55a678b1e53f42eaf630499a66857d7df923cc1018e80a600ac47063
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30317d7b1938bdbcc9a22d97f01bcb942661d60c8a5091072798ab5aeb8c102d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1f71265ba8cd08cd3b24db9213294ebdb63f997b0b686de8f79962089e55e3
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d7ac8d597b7ec3e4ffbece8dfc2e6bf240d4f9bb2fd8c33c6cf950c39a8bfba
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:150d4722a87d1e0c9417fe2ad5187fcdb99bc1fa8df35b1041b525828c9619c7
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:934fd8b46f09eaf550920a49d68342628d951e52603050827548210fb29af8b7
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3306,6 +3306,66 @@
|
|
3306 |
"learning_rate": 6.909824385292317e-05,
|
3307 |
"loss": 0.3281,
|
3308 |
"step": 275000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3309 |
}
|
3310 |
],
|
3311 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.765950928077208,
|
5 |
+
"global_step": 280000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3306 |
"learning_rate": 6.909824385292317e-05,
|
3307 |
"loss": 0.3281,
|
3308 |
"step": 275000
|
3309 |
+
},
|
3310 |
+
{
|
3311 |
+
"epoch": 4.69,
|
3312 |
+
"learning_rate": 6.888106984248133e-05,
|
3313 |
+
"loss": 0.3279,
|
3314 |
+
"step": 275500
|
3315 |
+
},
|
3316 |
+
{
|
3317 |
+
"epoch": 4.7,
|
3318 |
+
"learning_rate": 6.866400557139124e-05,
|
3319 |
+
"loss": 0.328,
|
3320 |
+
"step": 276000
|
3321 |
+
},
|
3322 |
+
{
|
3323 |
+
"epoch": 4.71,
|
3324 |
+
"learning_rate": 6.844705318198965e-05,
|
3325 |
+
"loss": 0.3276,
|
3326 |
+
"step": 276500
|
3327 |
+
},
|
3328 |
+
{
|
3329 |
+
"epoch": 4.71,
|
3330 |
+
"learning_rate": 6.823064837702448e-05,
|
3331 |
+
"loss": 0.3279,
|
3332 |
+
"step": 277000
|
3333 |
+
},
|
3334 |
+
{
|
3335 |
+
"epoch": 4.72,
|
3336 |
+
"learning_rate": 6.801392593911085e-05,
|
3337 |
+
"loss": 0.3278,
|
3338 |
+
"step": 277500
|
3339 |
+
},
|
3340 |
+
{
|
3341 |
+
"epoch": 4.73,
|
3342 |
+
"learning_rate": 6.779732179890923e-05,
|
3343 |
+
"loss": 0.3277,
|
3344 |
+
"step": 278000
|
3345 |
+
},
|
3346 |
+
{
|
3347 |
+
"epoch": 4.74,
|
3348 |
+
"learning_rate": 6.758083809421504e-05,
|
3349 |
+
"loss": 0.3277,
|
3350 |
+
"step": 278500
|
3351 |
+
},
|
3352 |
+
{
|
3353 |
+
"epoch": 4.75,
|
3354 |
+
"learning_rate": 6.736447696163502e-05,
|
3355 |
+
"loss": 0.3276,
|
3356 |
+
"step": 279000
|
3357 |
+
},
|
3358 |
+
{
|
3359 |
+
"epoch": 4.76,
|
3360 |
+
"learning_rate": 6.714824053656624e-05,
|
3361 |
+
"loss": 0.3273,
|
3362 |
+
"step": 279500
|
3363 |
+
},
|
3364 |
+
{
|
3365 |
+
"epoch": 4.77,
|
3366 |
+
"learning_rate": 6.693213095317489e-05,
|
3367 |
+
"loss": 0.3274,
|
3368 |
+
"step": 280000
|
3369 |
}
|
3370 |
],
|
3371 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02e1e464ec648e1c27493b3bd8c73cd61efe7f80d5890c32e96d2d92c2d0ec34
|
3 |
size 201355195
|