Training in progress, step 630000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d82e2070f1b6c3e7ba3ac4e05d24d0c3c44e21fea62b60f30dbcd4b8aea7947
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2aa845416abb07e55de755c0df750468c782329996a5b84f0336b895c9a2baa1
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4acbb51e13aa598f4ce3e939d773ff547815a4a8c85e2bd41643e066b97a863b
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:798c603bd32aeea05803afc8a5c1b8f53a6193b4dd4a2b5a26aa63e0065ce70e
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0fb0d506dd83556c6bb13f32358c5188773f715722e0fae2529869d20b06680
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa6390e57fdd76fefb80f0c1ccc262a99cb2e588a24afb098b63de6015cd6400
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:445023bf71af0013aa9e5796238ea2c1da2dcfc1e0064784bf2e5fc75bdc80c6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4594,11 +4594,85 @@
|
|
4594 |
"eval_samples_per_second": 944.48,
|
4595 |
"eval_steps_per_second": 15.112,
|
4596 |
"step": 620000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4597 |
}
|
4598 |
],
|
4599 |
"max_steps": 1000000,
|
4600 |
"num_train_epochs": 16,
|
4601 |
-
"total_flos": 4.
|
4602 |
"trial_name": null,
|
4603 |
"trial_params": null
|
4604 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.620229969306886,
|
5 |
+
"global_step": 630000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4594 |
"eval_samples_per_second": 944.48,
|
4595 |
"eval_steps_per_second": 15.112,
|
4596 |
"step": 620000
|
4597 |
+
},
|
4598 |
+
{
|
4599 |
+
"epoch": 9.48,
|
4600 |
+
"learning_rate": 5.814877301838688e-05,
|
4601 |
+
"loss": 0.2523,
|
4602 |
+
"step": 621000
|
4603 |
+
},
|
4604 |
+
{
|
4605 |
+
"epoch": 9.5,
|
4606 |
+
"learning_rate": 5.7928974604537494e-05,
|
4607 |
+
"loss": 0.2522,
|
4608 |
+
"step": 622000
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 9.51,
|
4612 |
+
"learning_rate": 5.770941755588573e-05,
|
4613 |
+
"loss": 0.2537,
|
4614 |
+
"step": 623000
|
4615 |
+
},
|
4616 |
+
{
|
4617 |
+
"epoch": 9.53,
|
4618 |
+
"learning_rate": 5.749010427347233e-05,
|
4619 |
+
"loss": 0.254,
|
4620 |
+
"step": 624000
|
4621 |
+
},
|
4622 |
+
{
|
4623 |
+
"epoch": 9.54,
|
4624 |
+
"learning_rate": 5.7271037155672156e-05,
|
4625 |
+
"loss": 0.2522,
|
4626 |
+
"step": 625000
|
4627 |
+
},
|
4628 |
+
{
|
4629 |
+
"epoch": 9.54,
|
4630 |
+
"eval_runtime": 1.0707,
|
4631 |
+
"eval_samples_per_second": 934.001,
|
4632 |
+
"eval_steps_per_second": 14.944,
|
4633 |
+
"step": 625000
|
4634 |
+
},
|
4635 |
+
{
|
4636 |
+
"epoch": 9.56,
|
4637 |
+
"learning_rate": 5.7052218598168154e-05,
|
4638 |
+
"loss": 0.2524,
|
4639 |
+
"step": 626000
|
4640 |
+
},
|
4641 |
+
{
|
4642 |
+
"epoch": 9.57,
|
4643 |
+
"learning_rate": 5.6833650993925016e-05,
|
4644 |
+
"loss": 0.2522,
|
4645 |
+
"step": 627000
|
4646 |
+
},
|
4647 |
+
{
|
4648 |
+
"epoch": 9.59,
|
4649 |
+
"learning_rate": 5.661533673316303e-05,
|
4650 |
+
"loss": 0.2522,
|
4651 |
+
"step": 628000
|
4652 |
+
},
|
4653 |
+
{
|
4654 |
+
"epoch": 9.6,
|
4655 |
+
"learning_rate": 5.639727820333198e-05,
|
4656 |
+
"loss": 0.2518,
|
4657 |
+
"step": 629000
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 9.62,
|
4661 |
+
"learning_rate": 5.617947778908498e-05,
|
4662 |
+
"loss": 0.2517,
|
4663 |
+
"step": 630000
|
4664 |
+
},
|
4665 |
+
{
|
4666 |
+
"epoch": 9.62,
|
4667 |
+
"eval_runtime": 1.1949,
|
4668 |
+
"eval_samples_per_second": 836.899,
|
4669 |
+
"eval_steps_per_second": 13.39,
|
4670 |
+
"step": 630000
|
4671 |
}
|
4672 |
],
|
4673 |
"max_steps": 1000000,
|
4674 |
"num_train_epochs": 16,
|
4675 |
+
"total_flos": 4.416310531298531e+22,
|
4676 |
"trial_name": null,
|
4677 |
"trial_params": null
|
4678 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2aa845416abb07e55de755c0df750468c782329996a5b84f0336b895c9a2baa1
|
3 |
size 449471589
|