Training in progress, step 390000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fee40aa1a6fbacfee552fffa429a1bb6a6bbe0587a8c00ceeeb72d56fcc5491b
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b03a99e1730ed0b4ad5c89ac4a80049122fe9a7b433c61647b26278b007fbb43
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba2482f8cf2440cec0254d5988cce37640442e59f079fb17bd6bcd415e99bcb1
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d70e0b0c79d2fa6d6a28c01c5bc090995fd291533a40cd841db17d67210ae78a
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a3fc66b05c070cd63ffd00ff8eb77c5a4d2302ca4f6995b3280fe94d5663ac9
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bacc9e2fc4f21f22254517223c74cdcb7c52a6a0ffca9793eaf062a0287de1d
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:684e728a687523626e6715432733611e4b4256378736ca4bcaa27753a07851d5
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e66e0b2e2c0276f2bbbbb229f2d3a165b984c5fe5faf07983c7e165b1a34186
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4626,6 +4626,66 @@
|
|
4626 |
"learning_rate": 2.753587746225503e-05,
|
4627 |
"loss": 0.3208,
|
4628 |
"step": 385000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4629 |
}
|
4630 |
],
|
4631 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.6382924400643395,
|
5 |
+
"global_step": 390000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4626 |
"learning_rate": 2.753587746225503e-05,
|
4627 |
"loss": 0.3208,
|
4628 |
"step": 385000
|
4629 |
+
},
|
4630 |
+
{
|
4631 |
+
"epoch": 6.56,
|
4632 |
+
"learning_rate": 2.7390551122062965e-05,
|
4633 |
+
"loss": 0.3208,
|
4634 |
+
"step": 385500
|
4635 |
+
},
|
4636 |
+
{
|
4637 |
+
"epoch": 6.57,
|
4638 |
+
"learning_rate": 2.7245744015892008e-05,
|
4639 |
+
"loss": 0.3206,
|
4640 |
+
"step": 386000
|
4641 |
+
},
|
4642 |
+
{
|
4643 |
+
"epoch": 6.58,
|
4644 |
+
"learning_rate": 2.7101457572929877e-05,
|
4645 |
+
"loss": 0.3206,
|
4646 |
+
"step": 386500
|
4647 |
+
},
|
4648 |
+
{
|
4649 |
+
"epoch": 6.59,
|
4650 |
+
"learning_rate": 2.6957693217225504e-05,
|
4651 |
+
"loss": 0.3207,
|
4652 |
+
"step": 387000
|
4653 |
+
},
|
4654 |
+
{
|
4655 |
+
"epoch": 6.6,
|
4656 |
+
"learning_rate": 2.6814452367675008e-05,
|
4657 |
+
"loss": 0.3205,
|
4658 |
+
"step": 387500
|
4659 |
+
},
|
4660 |
+
{
|
4661 |
+
"epoch": 6.6,
|
4662 |
+
"learning_rate": 2.6671736438007784e-05,
|
4663 |
+
"loss": 0.3206,
|
4664 |
+
"step": 388000
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 6.61,
|
4668 |
+
"learning_rate": 2.6529830689765492e-05,
|
4669 |
+
"loss": 0.3206,
|
4670 |
+
"step": 388500
|
4671 |
+
},
|
4672 |
+
{
|
4673 |
+
"epoch": 6.62,
|
4674 |
+
"learning_rate": 2.6388167763455287e-05,
|
4675 |
+
"loss": 0.3204,
|
4676 |
+
"step": 389000
|
4677 |
+
},
|
4678 |
+
{
|
4679 |
+
"epoch": 6.63,
|
4680 |
+
"learning_rate": 2.62470339642853e-05,
|
4681 |
+
"loss": 0.3204,
|
4682 |
+
"step": 389500
|
4683 |
+
},
|
4684 |
+
{
|
4685 |
+
"epoch": 6.64,
|
4686 |
+
"learning_rate": 2.6106711361365064e-05,
|
4687 |
+
"loss": 0.3206,
|
4688 |
+
"step": 390000
|
4689 |
}
|
4690 |
],
|
4691 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b03a99e1730ed0b4ad5c89ac4a80049122fe9a7b433c61647b26278b007fbb43
|
3 |
size 201355195
|