Training in progress, step 385000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18de3f7cc4a91ac6868744a9cd20f39edaebacb5665b74cf4f2175021a0edb90
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7132bd452806a1828ae1699a118b9aa68caeae454b47d624252df8772b23b65
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53e62c4c1922f2a1587e8d53c73b96e423282fb6d6259d9ca0f71ac25e5e8153
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10f7532403f5b493736d56d3ea7cc99fbf67e90d193efeb30456688505225f32
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4e2f6272d55d9d66dae051add9b1254b4bef8eb2c18fe3b5027696d129a3821
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28a933ecd96bcb0ef9d59cde72c7a2f327911f311001e588d3f029f5f85e1a29
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0634118a00ed086ccf276fc79772adc912101484932dd3c63dd24fd9eb2718ce
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:468447bff0f0da8cbb0e93720be4efb28f0be877ee6d17ac6e3f1416931c3f00
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4566,6 +4566,66 @@
|
|
4566 |
"learning_rate": 2.9016777909103095e-05,
|
4567 |
"loss": 0.3207,
|
4568 |
"step": 380000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4569 |
}
|
4570 |
],
|
4571 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.553186781389095,
|
5 |
+
"global_step": 385000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4566 |
"learning_rate": 2.9016777909103095e-05,
|
4567 |
"loss": 0.3207,
|
4568 |
"step": 380000
|
4569 |
+
},
|
4570 |
+
{
|
4571 |
+
"epoch": 6.48,
|
4572 |
+
"learning_rate": 2.88663409700897e-05,
|
4573 |
+
"loss": 0.3209,
|
4574 |
+
"step": 380500
|
4575 |
+
},
|
4576 |
+
{
|
4577 |
+
"epoch": 6.49,
|
4578 |
+
"learning_rate": 2.8716408699647456e-05,
|
4579 |
+
"loss": 0.3207,
|
4580 |
+
"step": 381000
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 6.49,
|
4584 |
+
"learning_rate": 2.85669825775473e-05,
|
4585 |
+
"loss": 0.3207,
|
4586 |
+
"step": 381500
|
4587 |
+
},
|
4588 |
+
{
|
4589 |
+
"epoch": 6.5,
|
4590 |
+
"learning_rate": 2.8418064078564776e-05,
|
4591 |
+
"loss": 0.3208,
|
4592 |
+
"step": 382000
|
4593 |
+
},
|
4594 |
+
{
|
4595 |
+
"epoch": 6.51,
|
4596 |
+
"learning_rate": 2.8269950982228437e-05,
|
4597 |
+
"loss": 0.3208,
|
4598 |
+
"step": 382500
|
4599 |
+
},
|
4600 |
+
{
|
4601 |
+
"epoch": 6.52,
|
4602 |
+
"learning_rate": 2.8122051111179246e-05,
|
4603 |
+
"loss": 0.3207,
|
4604 |
+
"step": 383000
|
4605 |
+
},
|
4606 |
+
{
|
4607 |
+
"epoch": 6.53,
|
4608 |
+
"learning_rate": 2.7974663254541644e-05,
|
4609 |
+
"loss": 0.3208,
|
4610 |
+
"step": 383500
|
4611 |
+
},
|
4612 |
+
{
|
4613 |
+
"epoch": 6.54,
|
4614 |
+
"learning_rate": 2.782778886697424e-05,
|
4615 |
+
"loss": 0.3208,
|
4616 |
+
"step": 384000
|
4617 |
+
},
|
4618 |
+
{
|
4619 |
+
"epoch": 6.54,
|
4620 |
+
"learning_rate": 2.768172160215594e-05,
|
4621 |
+
"loss": 0.3205,
|
4622 |
+
"step": 384500
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 6.55,
|
4626 |
+
"learning_rate": 2.753587746225503e-05,
|
4627 |
+
"loss": 0.3208,
|
4628 |
+
"step": 385000
|
4629 |
}
|
4630 |
],
|
4631 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7132bd452806a1828ae1699a118b9aa68caeae454b47d624252df8772b23b65
|
3 |
size 201355195
|