Training in progress, step 650000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa92f0e8b9e69e1553d8b13a15bb13ce9949137fccf0723c1cf598ce83f198b0
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b48d3a0bd417c9af2fc7e229c4f39167675dca2415013cbeac1e6dc95824f669
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e055e902e7363a164e2d5682ba553c77cec859581fb13cd45150bf96f1a362c
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:436765cf8b9dcd2a96469489c52342b1fc2a8edf0ab7af7b53c1cbd1ff9932a6
|
3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ee8f18ff63c361ce90d137b232b2607444382342857d71c811d9abe82e89eeb
|
3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:106d03af9d874407e7a0086ddb94edb099a500fa25e66c11a4dedce8d45fc7e2
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b544465929a51046e9a52e629bd463b9098d69ff8cc60ad2e18003214dae8858
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4742,11 +4742,85 @@
|
|
4742 |
"eval_samples_per_second": 993.184,
|
4743 |
"eval_steps_per_second": 15.891,
|
4744 |
"step": 640000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4745 |
}
|
4746 |
],
|
4747 |
"max_steps": 1000000,
|
4748 |
"num_train_epochs": 16,
|
4749 |
-
"total_flos": 4.
|
4750 |
"trial_name": null,
|
4751 |
"trial_params": null
|
4752 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.925634095316628,
|
5 |
+
"global_step": 650000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4742 |
"eval_samples_per_second": 993.184,
|
4743 |
"eval_steps_per_second": 15.891,
|
4744 |
"step": 640000
|
4745 |
+
},
|
4746 |
+
{
|
4747 |
+
"epoch": 9.79,
|
4748 |
+
"learning_rate": 5.3801387994131576e-05,
|
4749 |
+
"loss": 0.2501,
|
4750 |
+
"step": 641000
|
4751 |
+
},
|
4752 |
+
{
|
4753 |
+
"epoch": 9.8,
|
4754 |
+
"learning_rate": 5.358686991636209e-05,
|
4755 |
+
"loss": 0.2503,
|
4756 |
+
"step": 642000
|
4757 |
+
},
|
4758 |
+
{
|
4759 |
+
"epoch": 9.82,
|
4760 |
+
"learning_rate": 5.3372640688351476e-05,
|
4761 |
+
"loss": 0.2505,
|
4762 |
+
"step": 643000
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 9.83,
|
4766 |
+
"learning_rate": 5.315870265287618e-05,
|
4767 |
+
"loss": 0.2502,
|
4768 |
+
"step": 644000
|
4769 |
+
},
|
4770 |
+
{
|
4771 |
+
"epoch": 9.85,
|
4772 |
+
"learning_rate": 5.294505814952835e-05,
|
4773 |
+
"loss": 0.2501,
|
4774 |
+
"step": 645000
|
4775 |
+
},
|
4776 |
+
{
|
4777 |
+
"epoch": 9.85,
|
4778 |
+
"eval_runtime": 1.0688,
|
4779 |
+
"eval_samples_per_second": 935.652,
|
4780 |
+
"eval_steps_per_second": 14.97,
|
4781 |
+
"step": 645000
|
4782 |
+
},
|
4783 |
+
{
|
4784 |
+
"epoch": 9.86,
|
4785 |
+
"learning_rate": 5.2731709514689995e-05,
|
4786 |
+
"loss": 0.2502,
|
4787 |
+
"step": 646000
|
4788 |
+
},
|
4789 |
+
{
|
4790 |
+
"epoch": 9.88,
|
4791 |
+
"learning_rate": 5.25186590815076e-05,
|
4792 |
+
"loss": 0.2501,
|
4793 |
+
"step": 647000
|
4794 |
+
},
|
4795 |
+
{
|
4796 |
+
"epoch": 9.9,
|
4797 |
+
"learning_rate": 5.2305909179866635e-05,
|
4798 |
+
"loss": 0.2495,
|
4799 |
+
"step": 648000
|
4800 |
+
},
|
4801 |
+
{
|
4802 |
+
"epoch": 9.91,
|
4803 |
+
"learning_rate": 5.209346213636584e-05,
|
4804 |
+
"loss": 0.2498,
|
4805 |
+
"step": 649000
|
4806 |
+
},
|
4807 |
+
{
|
4808 |
+
"epoch": 9.93,
|
4809 |
+
"learning_rate": 5.188132027429215e-05,
|
4810 |
+
"loss": 0.2495,
|
4811 |
+
"step": 650000
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 9.93,
|
4815 |
+
"eval_runtime": 1.0361,
|
4816 |
+
"eval_samples_per_second": 965.164,
|
4817 |
+
"eval_steps_per_second": 15.443,
|
4818 |
+
"step": 650000
|
4819 |
}
|
4820 |
],
|
4821 |
"max_steps": 1000000,
|
4822 |
"num_train_epochs": 16,
|
4823 |
+
"total_flos": 4.556511053359226e+22,
|
4824 |
"trial_name": null,
|
4825 |
"trial_params": null
|
4826 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b48d3a0bd417c9af2fc7e229c4f39167675dca2415013cbeac1e6dc95824f669
|
3 |
size 449471589
|