Training in progress, step 475000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f630ea11527d197e0051d630a3c7684e04ac097735851914a04564cfa290662a
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55a346d3f07ebfee804ae0c4f2dae5d72f27c7b679ca8fdd943dd3fd17b9d683
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52127cd3b69efa84d95e8d727cc591494451ce5ed8828e23e997b43eb36eecb2
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dda55b720bc9bae3b0bf6e8c7ffae8f1314b9ba4095ff7cbc54bd1dfc75c8cc7
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87be04b3b35410443ebe6570babc429a6b6a3f95c5b0f38bfd1c9c5398abfe0a
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9f467d9f8afaebc6e44d3fee8e452fbc9bcf86ce65ff48be9ae812a59e27b2c
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:911e6646c31dedcf9cf3679206057244279210182cc9bc754d7e887f4554a86b
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8192356049e5a5cfc33bc765cffbe4a97cbddd7e409ef12d0abda813cadbf6d0
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5646,6 +5646,66 @@
|
|
5646 |
"learning_rate": 1.1254771050561826e-05,
|
5647 |
"loss": 0.3186,
|
5648 |
"step": 470000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5649 |
}
|
5650 |
],
|
5651 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.085105658675245,
|
5 |
+
"global_step": 475000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5646 |
"learning_rate": 1.1254771050561826e-05,
|
5647 |
"loss": 0.3186,
|
5648 |
"step": 470000
|
5649 |
+
},
|
5650 |
+
{
|
5651 |
+
"epoch": 8.01,
|
5652 |
+
"learning_rate": 1.1213740324212508e-05,
|
5653 |
+
"loss": 0.3186,
|
5654 |
+
"step": 470500
|
5655 |
+
},
|
5656 |
+
{
|
5657 |
+
"epoch": 8.02,
|
5658 |
+
"learning_rate": 1.117330558594806e-05,
|
5659 |
+
"loss": 0.3189,
|
5660 |
+
"step": 471000
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 8.03,
|
5664 |
+
"learning_rate": 1.1133550139371026e-05,
|
5665 |
+
"loss": 0.3185,
|
5666 |
+
"step": 471500
|
5667 |
+
},
|
5668 |
+
{
|
5669 |
+
"epoch": 8.03,
|
5670 |
+
"learning_rate": 1.1094474376851596e-05,
|
5671 |
+
"loss": 0.3187,
|
5672 |
+
"step": 472000
|
5673 |
+
},
|
5674 |
+
{
|
5675 |
+
"epoch": 8.04,
|
5676 |
+
"learning_rate": 1.1056154796474798e-05,
|
5677 |
+
"loss": 0.3186,
|
5678 |
+
"step": 472500
|
5679 |
+
},
|
5680 |
+
{
|
5681 |
+
"epoch": 8.05,
|
5682 |
+
"learning_rate": 1.1018438191073521e-05,
|
5683 |
+
"loss": 0.3187,
|
5684 |
+
"step": 473000
|
5685 |
+
},
|
5686 |
+
{
|
5687 |
+
"epoch": 8.06,
|
5688 |
+
"learning_rate": 1.0981475797706187e-05,
|
5689 |
+
"loss": 0.3186,
|
5690 |
+
"step": 473500
|
5691 |
+
},
|
5692 |
+
{
|
5693 |
+
"epoch": 8.07,
|
5694 |
+
"learning_rate": 1.0945191867295903e-05,
|
5695 |
+
"loss": 0.3186,
|
5696 |
+
"step": 474000
|
5697 |
+
},
|
5698 |
+
{
|
5699 |
+
"epoch": 8.08,
|
5700 |
+
"learning_rate": 1.0909516085370731e-05,
|
5701 |
+
"loss": 0.3186,
|
5702 |
+
"step": 474500
|
5703 |
+
},
|
5704 |
+
{
|
5705 |
+
"epoch": 8.09,
|
5706 |
+
"learning_rate": 1.0874522198628832e-05,
|
5707 |
+
"loss": 0.3185,
|
5708 |
+
"step": 475000
|
5709 |
}
|
5710 |
],
|
5711 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55a346d3f07ebfee804ae0c4f2dae5d72f27c7b679ca8fdd943dd3fd17b9d683
|
3 |
size 201355195
|