Training in progress, step 485000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d893568f1b98d344be31605b8fae96006b46a052189571c91e8e7ad3d7d78aa0
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ccac6cfd193e438464e5b6e186f7cfb7a9fd45fac4373893748d8948cac56c4
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:011ebe7c11d42cd0a1c9b353a4965be6c7e082f73fc7473a2a615e866449a3b0
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:637dc4bb1fca17ae7fe8b636a8730cdbeb63210200e935b533f565d1c0df8760
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f17a75302e0f04f282b97e3b350868a9f3d9c32fda1e8e8967e950ebb8a6f6fa
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b61883c7448b2ee328c7d8245911d6c0b73c91036e9453c3237361bcca67410c
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9adb8b1f94ae4dfb98288f1d9b1d42fa2616a61139269170f13efbd2d3c52aac
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d51d5350efcb705648a3973831fb0ad4e4c422f14bb0cb6af95016a44e9103
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5766,6 +5766,66 @@
|
|
5766 |
"learning_rate": 1.0562215843511462e-05,
|
5767 |
"loss": 0.3186,
|
5768 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5769 |
}
|
5770 |
],
|
5771 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.255316976025735,
|
5 |
+
"global_step": 485000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5766 |
"learning_rate": 1.0562215843511462e-05,
|
5767 |
"loss": 0.3186,
|
5768 |
"step": 480000
|
5769 |
+
},
|
5770 |
+
{
|
5771 |
+
"epoch": 8.18,
|
5772 |
+
"learning_rate": 1.0534742744692915e-05,
|
5773 |
+
"loss": 0.3186,
|
5774 |
+
"step": 480500
|
5775 |
+
},
|
5776 |
+
{
|
5777 |
+
"epoch": 8.19,
|
5778 |
+
"learning_rate": 1.0507955239919215e-05,
|
5779 |
+
"loss": 0.3185,
|
5780 |
+
"step": 481000
|
5781 |
+
},
|
5782 |
+
{
|
5783 |
+
"epoch": 8.2,
|
5784 |
+
"learning_rate": 1.0481853593572226e-05,
|
5785 |
+
"loss": 0.3184,
|
5786 |
+
"step": 481500
|
5787 |
+
},
|
5788 |
+
{
|
5789 |
+
"epoch": 8.2,
|
5790 |
+
"learning_rate": 1.0456488209413605e-05,
|
5791 |
+
"loss": 0.3185,
|
5792 |
+
"step": 482000
|
5793 |
+
},
|
5794 |
+
{
|
5795 |
+
"epoch": 8.21,
|
5796 |
+
"learning_rate": 1.0431757673006884e-05,
|
5797 |
+
"loss": 0.3184,
|
5798 |
+
"step": 482500
|
5799 |
+
},
|
5800 |
+
{
|
5801 |
+
"epoch": 8.22,
|
5802 |
+
"learning_rate": 1.0407713747066088e-05,
|
5803 |
+
"loss": 0.3184,
|
5804 |
+
"step": 483000
|
5805 |
+
},
|
5806 |
+
{
|
5807 |
+
"epoch": 8.23,
|
5808 |
+
"learning_rate": 1.038435666889507e-05,
|
5809 |
+
"loss": 0.3186,
|
5810 |
+
"step": 483500
|
5811 |
+
},
|
5812 |
+
{
|
5813 |
+
"epoch": 8.24,
|
5814 |
+
"learning_rate": 1.036168666901875e-05,
|
5815 |
+
"loss": 0.3183,
|
5816 |
+
"step": 484000
|
5817 |
+
},
|
5818 |
+
{
|
5819 |
+
"epoch": 8.25,
|
5820 |
+
"learning_rate": 1.0339747250503798e-05,
|
5821 |
+
"loss": 0.3185,
|
5822 |
+
"step": 484500
|
5823 |
+
},
|
5824 |
+
{
|
5825 |
+
"epoch": 8.26,
|
5826 |
+
"learning_rate": 1.0318450696414725e-05,
|
5827 |
+
"loss": 0.3186,
|
5828 |
+
"step": 485000
|
5829 |
}
|
5830 |
],
|
5831 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ccac6cfd193e438464e5b6e186f7cfb7a9fd45fac4373893748d8948cac56c4
|
3 |
size 201355195
|