Training in progress, step 285000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:384293c7b6170abe531d7bdf7476bb62f67ad8ce508056061bc0de67e870104f
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b5a8b986e893a68e13b43ea75fe3e572af751fb6a190938f14988c4ad5ac3c2
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a06a1d9ebb40a393bd637d58f8ac7f856d188c1c5bf11d1566eb610b346ad8db
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37d7cf8cd4c6eef2e6ad10c251ab5f9b0130c611543cc0602e57cfa17d524f95
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8649226118c04ea709506642ca7fa86b6350292287b21608c909d6c416ffbf9
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b28ff5b5c2e38d28ac0e3b1d579a52d003d336b7290b91f14ef1a77b308dc1d
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b5b841b24ad93d751ff4010600ca084716ce2c6f905f00caaa1959fea109ec4
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a03e6f2e407c63ad22a8716076189eef21e436952132cb7536181c961b045df
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3366,6 +3366,66 @@
|
|
3366 |
"learning_rate": 6.693213095317489e-05,
|
3367 |
"loss": 0.3274,
|
3368 |
"step": 280000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3369 |
}
|
3370 |
],
|
3371 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.8510565867524535,
|
5 |
+
"global_step": 285000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3366 |
"learning_rate": 6.693213095317489e-05,
|
3367 |
"loss": 0.3274,
|
3368 |
"step": 280000
|
3369 |
+
},
|
3370 |
+
{
|
3371 |
+
"epoch": 4.77,
|
3372 |
+
"learning_rate": 6.671615034437533e-05,
|
3373 |
+
"loss": 0.3272,
|
3374 |
+
"step": 280500
|
3375 |
+
},
|
3376 |
+
{
|
3377 |
+
"epoch": 4.78,
|
3378 |
+
"learning_rate": 6.650030084180895e-05,
|
3379 |
+
"loss": 0.3272,
|
3380 |
+
"step": 281000
|
3381 |
+
},
|
3382 |
+
{
|
3383 |
+
"epoch": 4.79,
|
3384 |
+
"learning_rate": 6.628458457582322e-05,
|
3385 |
+
"loss": 0.3273,
|
3386 |
+
"step": 281500
|
3387 |
+
},
|
3388 |
+
{
|
3389 |
+
"epoch": 4.8,
|
3390 |
+
"learning_rate": 6.606900367545062e-05,
|
3391 |
+
"loss": 0.3272,
|
3392 |
+
"step": 282000
|
3393 |
+
},
|
3394 |
+
{
|
3395 |
+
"epoch": 4.81,
|
3396 |
+
"learning_rate": 6.585356026838752e-05,
|
3397 |
+
"loss": 0.327,
|
3398 |
+
"step": 282500
|
3399 |
+
},
|
3400 |
+
{
|
3401 |
+
"epoch": 4.82,
|
3402 |
+
"learning_rate": 6.563825648097345e-05,
|
3403 |
+
"loss": 0.3268,
|
3404 |
+
"step": 283000
|
3405 |
+
},
|
3406 |
+
{
|
3407 |
+
"epoch": 4.83,
|
3408 |
+
"learning_rate": 6.542309443816984e-05,
|
3409 |
+
"loss": 0.3271,
|
3410 |
+
"step": 283500
|
3411 |
+
},
|
3412 |
+
{
|
3413 |
+
"epoch": 4.83,
|
3414 |
+
"learning_rate": 6.520807626353919e-05,
|
3415 |
+
"loss": 0.3268,
|
3416 |
+
"step": 284000
|
3417 |
+
},
|
3418 |
+
{
|
3419 |
+
"epoch": 4.84,
|
3420 |
+
"learning_rate": 6.499363367648454e-05,
|
3421 |
+
"loss": 0.3268,
|
3422 |
+
"step": 284500
|
3423 |
+
},
|
3424 |
+
{
|
3425 |
+
"epoch": 4.85,
|
3426 |
+
"learning_rate": 6.477890930484919e-05,
|
3427 |
+
"loss": 0.3269,
|
3428 |
+
"step": 285000
|
3429 |
}
|
3430 |
],
|
3431 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b5a8b986e893a68e13b43ea75fe3e572af751fb6a190938f14988c4ad5ac3c2
|
3 |
size 201355195
|