Training in progress, step 480000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:125a953eae139b71c861d769dd5fcb3f2876cb3ba5332474ace67ff7903ad282
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cb09afc7f60da26cfbf13286b33ee5d8eaf949d0691655a730175e631e257c3
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8310a18059ec0119b7d5189ac12986b598d8b8b1ef1bbfc8c8957369e8337ad7
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4a54b152e9b8fc53442ea8e45557e4a5adac5097977f729107da5bb580c1c20
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c3be6e2beda8fd5ff4d10e6a6d31003cc62098e71db295fd431821efbbfb1a9
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1acdc1aa6e4187c8d0aa9e0711043619de98803eb54f6ce34ce5eccae47291d
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d89c33267da2eca03288d19643b70286b13de68f683e137d6b6c77f428e64db6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3484,11 +3484,85 @@
|
|
3484 |
"eval_samples_per_second": 1116.774,
|
3485 |
"eval_steps_per_second": 17.868,
|
3486 |
"step": 470000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3487 |
}
|
3488 |
],
|
3489 |
"max_steps": 1000000,
|
3490 |
"num_train_epochs": 16,
|
3491 |
-
"total_flos": 3.
|
3492 |
"trial_name": null,
|
3493 |
"trial_params": null
|
3494 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.329699024233817,
|
5 |
+
"global_step": 480000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3484 |
"eval_samples_per_second": 1116.774,
|
3485 |
"eval_steps_per_second": 17.868,
|
3486 |
"step": 470000
|
3487 |
+
},
|
3488 |
+
{
|
3489 |
+
"epoch": 7.19,
|
3490 |
+
"learning_rate": 9.243390115142761e-05,
|
3491 |
+
"loss": 0.2678,
|
3492 |
+
"step": 471000
|
3493 |
+
},
|
3494 |
+
{
|
3495 |
+
"epoch": 7.21,
|
3496 |
+
"learning_rate": 9.220602892445661e-05,
|
3497 |
+
"loss": 0.2678,
|
3498 |
+
"step": 472000
|
3499 |
+
},
|
3500 |
+
{
|
3501 |
+
"epoch": 7.22,
|
3502 |
+
"learning_rate": 9.197802321430889e-05,
|
3503 |
+
"loss": 0.2679,
|
3504 |
+
"step": 473000
|
3505 |
+
},
|
3506 |
+
{
|
3507 |
+
"epoch": 7.24,
|
3508 |
+
"learning_rate": 9.174988651441833e-05,
|
3509 |
+
"loss": 0.2673,
|
3510 |
+
"step": 474000
|
3511 |
+
},
|
3512 |
+
{
|
3513 |
+
"epoch": 7.25,
|
3514 |
+
"learning_rate": 9.152162131965137e-05,
|
3515 |
+
"loss": 0.2675,
|
3516 |
+
"step": 475000
|
3517 |
+
},
|
3518 |
+
{
|
3519 |
+
"epoch": 7.25,
|
3520 |
+
"eval_runtime": 1.0353,
|
3521 |
+
"eval_samples_per_second": 965.922,
|
3522 |
+
"eval_steps_per_second": 15.455,
|
3523 |
+
"step": 475000
|
3524 |
+
},
|
3525 |
+
{
|
3526 |
+
"epoch": 7.27,
|
3527 |
+
"learning_rate": 9.129323012627956e-05,
|
3528 |
+
"loss": 0.2693,
|
3529 |
+
"step": 476000
|
3530 |
+
},
|
3531 |
+
{
|
3532 |
+
"epoch": 7.28,
|
3533 |
+
"learning_rate": 9.106471543195244e-05,
|
3534 |
+
"loss": 0.2675,
|
3535 |
+
"step": 477000
|
3536 |
+
},
|
3537 |
+
{
|
3538 |
+
"epoch": 7.3,
|
3539 |
+
"learning_rate": 9.08360797356701e-05,
|
3540 |
+
"loss": 0.2679,
|
3541 |
+
"step": 478000
|
3542 |
+
},
|
3543 |
+
{
|
3544 |
+
"epoch": 7.31,
|
3545 |
+
"learning_rate": 9.060732553775582e-05,
|
3546 |
+
"loss": 0.2672,
|
3547 |
+
"step": 479000
|
3548 |
+
},
|
3549 |
+
{
|
3550 |
+
"epoch": 7.33,
|
3551 |
+
"learning_rate": 9.037845533982892e-05,
|
3552 |
+
"loss": 0.267,
|
3553 |
+
"step": 480000
|
3554 |
+
},
|
3555 |
+
{
|
3556 |
+
"epoch": 7.33,
|
3557 |
+
"eval_runtime": 1.0347,
|
3558 |
+
"eval_samples_per_second": 966.468,
|
3559 |
+
"eval_steps_per_second": 15.463,
|
3560 |
+
"step": 480000
|
3561 |
}
|
3562 |
],
|
3563 |
"max_steps": 1000000,
|
3564 |
"num_train_epochs": 16,
|
3565 |
+
"total_flos": 3.3648079299796217e+22,
|
3566 |
"trial_name": null,
|
3567 |
"trial_params": null
|
3568 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cb09afc7f60da26cfbf13286b33ee5d8eaf949d0691655a730175e631e257c3
|
3 |
size 449471589
|