Training in progress, step 400000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:935482101098fd1bcdba8ff4db7e80c3829f3028494140828e0a94cd2691c2d8
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bf63b57e9c92ad51945b78dcd295810ddc94ae491f8c80e57b169bae7716317
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:325753cffb70e35308b5d9f5fd9d5f81e4abc5575cc397e717b388a32e603120
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0a5f6b1461ae68dec64360dd3e057905146ceae77ab814e4a2b5b43cca29a7d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5791c011f9c77cc08334bc5642cc18df6e799e70037d743754cc941773ed51bd
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33d463e038379047a89525a90e8d432deeb676fbca369621cedef5ebd2be17cc
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e8834f32b5068af02b0ae35e7bc2e78bf21aef2c72704751d0c45945323a934
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c579cbfbdb05c1d3520e249d34e85627c452e7287efcbb543d8b7d39834bb0fc
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4746,6 +4746,66 @@
|
|
4746 |
"learning_rate": 2.473041898715981e-05,
|
4747 |
"loss": 0.3201,
|
4748 |
"step": 395000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4749 |
}
|
4750 |
],
|
4751 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.80850375741483,
|
5 |
+
"global_step": 400000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4746 |
"learning_rate": 2.473041898715981e-05,
|
4747 |
"loss": 0.3201,
|
4748 |
"step": 395000
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 6.73,
|
4752 |
+
"learning_rate": 2.459600848399211e-05,
|
4753 |
+
"loss": 0.3203,
|
4754 |
+
"step": 395500
|
4755 |
+
},
|
4756 |
+
{
|
4757 |
+
"epoch": 6.74,
|
4758 |
+
"learning_rate": 2.4462142610782094e-05,
|
4759 |
+
"loss": 0.3203,
|
4760 |
+
"step": 396000
|
4761 |
+
},
|
4762 |
+
{
|
4763 |
+
"epoch": 6.75,
|
4764 |
+
"learning_rate": 2.4328556058264563e-05,
|
4765 |
+
"loss": 0.3203,
|
4766 |
+
"step": 396500
|
4767 |
+
},
|
4768 |
+
{
|
4769 |
+
"epoch": 6.76,
|
4770 |
+
"learning_rate": 2.419551896042324e-05,
|
4771 |
+
"loss": 0.3201,
|
4772 |
+
"step": 397000
|
4773 |
+
},
|
4774 |
+
{
|
4775 |
+
"epoch": 6.77,
|
4776 |
+
"learning_rate": 2.4063032630280625e-05,
|
4777 |
+
"loss": 0.3201,
|
4778 |
+
"step": 397500
|
4779 |
+
},
|
4780 |
+
{
|
4781 |
+
"epoch": 6.77,
|
4782 |
+
"learning_rate": 2.3931098375423278e-05,
|
4783 |
+
"loss": 0.3203,
|
4784 |
+
"step": 398000
|
4785 |
+
},
|
4786 |
+
{
|
4787 |
+
"epoch": 6.78,
|
4788 |
+
"learning_rate": 2.3799717497989015e-05,
|
4789 |
+
"loss": 0.3202,
|
4790 |
+
"step": 398500
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 6.79,
|
4794 |
+
"learning_rate": 2.3668891294654094e-05,
|
4795 |
+
"loss": 0.3199,
|
4796 |
+
"step": 399000
|
4797 |
+
},
|
4798 |
+
{
|
4799 |
+
"epoch": 6.8,
|
4800 |
+
"learning_rate": 2.3538621056620283e-05,
|
4801 |
+
"loss": 0.3202,
|
4802 |
+
"step": 399500
|
4803 |
+
},
|
4804 |
+
{
|
4805 |
+
"epoch": 6.81,
|
4806 |
+
"learning_rate": 2.340890806960229e-05,
|
4807 |
+
"loss": 0.3201,
|
4808 |
+
"step": 400000
|
4809 |
}
|
4810 |
],
|
4811 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bf63b57e9c92ad51945b78dcd295810ddc94ae491f8c80e57b169bae7716317
|
3 |
size 201355195
|