Training in progress, step 60000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/May14_19-37-01_15e842f7c026/1684094526.0884142/events.out.tfevents.1684094526.15e842f7c026.771.1 +3 -0
- runs/May14_19-37-01_15e842f7c026/events.out.tfevents.1684094526.15e842f7c026.771.0 +3 -0
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4308792b2a033acdce96ce47703119686ade603de91305623f9a41bb3db8197d
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24918df0f1f9d1f5aadf7b8e4933628be882e87804ac0114cbca0ad2c83737f7
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14511
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:836afdbaf7ed0232c311bb2609eff325953c229bbc4edb90bdadf6a7412ce2aa
|
3 |
size 14511
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df056184eca867a0b00a4f1fa76891f3c4f5cedd6a7d840fcd41506dafeb0b63
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ae4577bcc43ac54c56a413372da787f2adba41ace305a09ecc065c5e91b0b80
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -678,11 +678,59 @@
|
|
678 |
"learning_rate": 4.9566902560686915e-06,
|
679 |
"loss": 1.7255,
|
680 |
"step": 56000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
681 |
}
|
682 |
],
|
683 |
"max_steps": 943410,
|
684 |
"num_train_epochs": 10,
|
685 |
-
"total_flos": 3.
|
686 |
"trial_name": null,
|
687 |
"trial_params": null
|
688 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6359907145355678,
|
5 |
+
"global_step": 60000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
678 |
"learning_rate": 4.9566902560686915e-06,
|
679 |
"loss": 1.7255,
|
680 |
"step": 56000
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 0.6,
|
684 |
+
"learning_rate": 4.955915400533522e-06,
|
685 |
+
"loss": 1.7255,
|
686 |
+
"step": 56500
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 0.6,
|
690 |
+
"learning_rate": 4.955133736487948e-06,
|
691 |
+
"loss": 1.7173,
|
692 |
+
"step": 57000
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 0.61,
|
696 |
+
"learning_rate": 4.954346849831028e-06,
|
697 |
+
"loss": 1.7042,
|
698 |
+
"step": 57500
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 0.61,
|
702 |
+
"learning_rate": 4.953551588890636e-06,
|
703 |
+
"loss": 1.7207,
|
704 |
+
"step": 58000
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 0.62,
|
708 |
+
"learning_rate": 4.952749525993012e-06,
|
709 |
+
"loss": 1.7328,
|
710 |
+
"step": 58500
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 0.63,
|
714 |
+
"learning_rate": 4.9519406633617066e-06,
|
715 |
+
"loss": 1.6936,
|
716 |
+
"step": 59000
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 0.63,
|
720 |
+
"learning_rate": 4.9511266413417645e-06,
|
721 |
+
"loss": 1.7368,
|
722 |
+
"step": 59500
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 0.64,
|
726 |
+
"learning_rate": 4.95030419957734e-06,
|
727 |
+
"loss": 1.7637,
|
728 |
+
"step": 60000
|
729 |
}
|
730 |
],
|
731 |
"max_steps": 943410,
|
732 |
"num_train_epochs": 10,
|
733 |
+
"total_flos": 3.379573384711373e+16,
|
734 |
"trial_name": null,
|
735 |
"trial_params": null
|
736 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a66e548f9c770d8ff9736169550c04df9cfcd91d488d79a7ef713c1f08defe3
|
3 |
size 3771
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24918df0f1f9d1f5aadf7b8e4933628be882e87804ac0114cbca0ad2c83737f7
|
3 |
size 1944201353
|
runs/May14_19-37-01_15e842f7c026/1684094526.0884142/events.out.tfevents.1684094526.15e842f7c026.771.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a3e00413caa297a4a4aa11c2c1592bf4831397edb8e6fc2139aa762e3bd438a
|
3 |
+
size 6187
|
runs/May14_19-37-01_15e842f7c026/events.out.tfevents.1684094526.15e842f7c026.771.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de578695d7b19188bd6bbd53f9a0638874a8dcac4a1771e6bcf927b6589c26eb
|
3 |
+
size 5743
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a66e548f9c770d8ff9736169550c04df9cfcd91d488d79a7ef713c1f08defe3
|
3 |
size 3771
|