Training in progress, step 465000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae04b2af6665d3abd0a5d470b2bf7284897a4bb07c27eee0b6e1ba438decaa9e
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:644ad84ef5090cb35e5912b8047809eda9520df938e75c6201875a5342c64fd2
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51c7096ef367fe08d551162878490b72e213ab4074868ce645c4d56dbd28b7f8
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6397a75202f41185a5c7f676904b0cdcfc1129c7b082fd80a7c71fec6249b58
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbddd6c4b5409d3e334436c44017507eac79522d187987e8599c405f1a9e9544
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5913a0c5a22066143a6562114115973db2da2530c508c25fc07b079b06992170
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97d81d8dbecd2ea9e42e8620e48d4dcdb6bff10d69a3ce07912303dc15dd9ce5
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:762160864ae4cc7ba8ec08f333d74f30ba91d560ae0a08efd676fd1a9052fbd3
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5526,6 +5526,66 @@
|
|
5526 |
"learning_rate": 1.2218580821636874e-05,
|
5527 |
"loss": 0.319,
|
5528 |
"step": 460000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5529 |
}
|
5530 |
],
|
5531 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.914885830758887,
|
5 |
+
"global_step": 465000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5526 |
"learning_rate": 1.2218580821636874e-05,
|
5527 |
"loss": 0.319,
|
5528 |
"step": 460000
|
5529 |
+
},
|
5530 |
+
{
|
5531 |
+
"epoch": 7.84,
|
5532 |
+
"learning_rate": 1.2163988785682091e-05,
|
5533 |
+
"loss": 0.3187,
|
5534 |
+
"step": 460500
|
5535 |
+
},
|
5536 |
+
{
|
5537 |
+
"epoch": 7.85,
|
5538 |
+
"learning_rate": 1.2110066263771492e-05,
|
5539 |
+
"loss": 0.3188,
|
5540 |
+
"step": 461000
|
5541 |
+
},
|
5542 |
+
{
|
5543 |
+
"epoch": 7.86,
|
5544 |
+
"learning_rate": 1.2056813788098594e-05,
|
5545 |
+
"loss": 0.3188,
|
5546 |
+
"step": 461500
|
5547 |
+
},
|
5548 |
+
{
|
5549 |
+
"epoch": 7.86,
|
5550 |
+
"learning_rate": 1.2004231884243836e-05,
|
5551 |
+
"loss": 0.3188,
|
5552 |
+
"step": 462000
|
5553 |
+
},
|
5554 |
+
{
|
5555 |
+
"epoch": 7.87,
|
5556 |
+
"learning_rate": 1.195242422270528e-05,
|
5557 |
+
"loss": 0.3186,
|
5558 |
+
"step": 462500
|
5559 |
+
},
|
5560 |
+
{
|
5561 |
+
"epoch": 7.88,
|
5562 |
+
"learning_rate": 1.1901285479547943e-05,
|
5563 |
+
"loss": 0.3187,
|
5564 |
+
"step": 463000
|
5565 |
+
},
|
5566 |
+
{
|
5567 |
+
"epoch": 7.89,
|
5568 |
+
"learning_rate": 1.1850715688988035e-05,
|
5569 |
+
"loss": 0.3187,
|
5570 |
+
"step": 463500
|
5571 |
+
},
|
5572 |
+
{
|
5573 |
+
"epoch": 7.9,
|
5574 |
+
"learning_rate": 1.1800818504351289e-05,
|
5575 |
+
"loss": 0.3186,
|
5576 |
+
"step": 464000
|
5577 |
+
},
|
5578 |
+
{
|
5579 |
+
"epoch": 7.91,
|
5580 |
+
"learning_rate": 1.175159441810279e-05,
|
5581 |
+
"loss": 0.3187,
|
5582 |
+
"step": 464500
|
5583 |
+
},
|
5584 |
+
{
|
5585 |
+
"epoch": 7.91,
|
5586 |
+
"learning_rate": 1.1703043916064376e-05,
|
5587 |
+
"loss": 0.3188,
|
5588 |
+
"step": 465000
|
5589 |
}
|
5590 |
],
|
5591 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:644ad84ef5090cb35e5912b8047809eda9520df938e75c6201875a5342c64fd2
|
3 |
size 201355195
|