Training in progress, step 500000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a329db70ea5526ba5e4b910d073864205b498f32a4378b384003610aada51d6
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3a4d73b67517b389449be9b81a6d62f88071ffeb1fb6ad679e4c42b56b14bda
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d512ffa1b18c6ccc8dac4806c6008b76bb8f78c31b60a4336ad6a2a9fa9bb7a3
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f66cebabe552d21647073ceeabf71a9c5fddbc9e0c70066a6914d3a038a79677
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:572120ff5e1d57786cb947f3c52e750254c5a4ff2a5c06ef2608f45e6a4e60de
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed3efb56468deb71f34f2a6667b3f91c0deb4e0556b62db345c7c959c339450d
|
3 |
+
size 14439
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61639df917cea6bc1eea9e7a1f48d3f6c9acb9557d8752aa9847613f1b857ad8
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -3632,11 +3632,85 @@
|
|
3632 |
"eval_samples_per_second": 930.213,
|
3633 |
"eval_steps_per_second": 14.883,
|
3634 |
"step": 490000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3635 |
}
|
3636 |
],
|
3637 |
"max_steps": 1000000,
|
3638 |
"num_train_epochs": 16,
|
3639 |
-
"total_flos": 3.
|
3640 |
"trial_name": null,
|
3641 |
"trial_params": null
|
3642 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.63510315024356,
|
5 |
+
"global_step": 500000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
3632 |
"eval_samples_per_second": 930.213,
|
3633 |
"eval_steps_per_second": 14.883,
|
3634 |
"step": 490000
|
3635 |
+
},
|
3636 |
+
{
|
3637 |
+
"epoch": 7.5,
|
3638 |
+
"learning_rate": 8.78539438398963e-05,
|
3639 |
+
"loss": 0.2655,
|
3640 |
+
"step": 491000
|
3641 |
+
},
|
3642 |
+
{
|
3643 |
+
"epoch": 7.51,
|
3644 |
+
"learning_rate": 8.762387720245008e-05,
|
3645 |
+
"loss": 0.2656,
|
3646 |
+
"step": 492000
|
3647 |
+
},
|
3648 |
+
{
|
3649 |
+
"epoch": 7.53,
|
3650 |
+
"learning_rate": 8.73937271915042e-05,
|
3651 |
+
"loss": 0.2655,
|
3652 |
+
"step": 493000
|
3653 |
+
},
|
3654 |
+
{
|
3655 |
+
"epoch": 7.54,
|
3656 |
+
"learning_rate": 8.716349632394235e-05,
|
3657 |
+
"loss": 0.2652,
|
3658 |
+
"step": 494000
|
3659 |
+
},
|
3660 |
+
{
|
3661 |
+
"epoch": 7.56,
|
3662 |
+
"learning_rate": 8.69331871175324e-05,
|
3663 |
+
"loss": 0.2651,
|
3664 |
+
"step": 495000
|
3665 |
+
},
|
3666 |
+
{
|
3667 |
+
"epoch": 7.56,
|
3668 |
+
"eval_runtime": 1.1978,
|
3669 |
+
"eval_samples_per_second": 834.871,
|
3670 |
+
"eval_steps_per_second": 13.358,
|
3671 |
+
"step": 495000
|
3672 |
+
},
|
3673 |
+
{
|
3674 |
+
"epoch": 7.57,
|
3675 |
+
"learning_rate": 8.67028020908989e-05,
|
3676 |
+
"loss": 0.2647,
|
3677 |
+
"step": 496000
|
3678 |
+
},
|
3679 |
+
{
|
3680 |
+
"epoch": 7.59,
|
3681 |
+
"learning_rate": 8.647234376349565e-05,
|
3682 |
+
"loss": 0.2653,
|
3683 |
+
"step": 497000
|
3684 |
+
},
|
3685 |
+
{
|
3686 |
+
"epoch": 7.6,
|
3687 |
+
"learning_rate": 8.624181465557794e-05,
|
3688 |
+
"loss": 0.2649,
|
3689 |
+
"step": 498000
|
3690 |
+
},
|
3691 |
+
{
|
3692 |
+
"epoch": 7.62,
|
3693 |
+
"learning_rate": 8.601121728817519e-05,
|
3694 |
+
"loss": 0.2647,
|
3695 |
+
"step": 499000
|
3696 |
+
},
|
3697 |
+
{
|
3698 |
+
"epoch": 7.64,
|
3699 |
+
"learning_rate": 8.578055418306327e-05,
|
3700 |
+
"loss": 0.2654,
|
3701 |
+
"step": 500000
|
3702 |
+
},
|
3703 |
+
{
|
3704 |
+
"epoch": 7.64,
|
3705 |
+
"eval_runtime": 1.1022,
|
3706 |
+
"eval_samples_per_second": 907.298,
|
3707 |
+
"eval_steps_per_second": 14.517,
|
3708 |
+
"step": 500000
|
3709 |
}
|
3710 |
],
|
3711 |
"max_steps": 1000000,
|
3712 |
"num_train_epochs": 16,
|
3713 |
+
"total_flos": 3.505008452040316e+22,
|
3714 |
"trial_name": null,
|
3715 |
"trial_params": null
|
3716 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3a4d73b67517b389449be9b81a6d62f88071ffeb1fb6ad679e4c42b56b14bda
|
3 |
size 449471589
|