Training in progress, step 420000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +62 -2
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402588883
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4930a5630150a11b50332fd15bf88b69dd42232bbdf14eecf7dc1e17b44a4dbe
|
3 |
size 402588883
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6254beaf2d9e3c9734b690d3b6c083e636ef9d7458e4ab7d5e2c554bd214921
|
3 |
size 201355195
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc602aaafaaee2301b384f9640da8e2e14582b8ba9808df99da9f8c7f2195081
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:658d25e9ca0d8fccef83b608e2bcf4cde0dd944efd57c5948cc4d84ead2558b4
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c69d90e3e609e630a4c0049edbaa5e451c76141342e8fd55320d068c21bdd77
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27b4af74bf166149790c7d38b3e4de7da32def4b6d67a817a9278c97b4f142e8
|
3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccc8e0205e9b2c583d574b288f447559ed65db515efe8ff002fa810d3779679f
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bb9c15ff69fd47e1ccbe4086f152edc12f237d5b1894782d50697ebb67243ef
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4986,6 +4986,66 @@
|
|
4986 |
"learning_rate": 1.978413491748704e-05,
|
4987 |
"loss": 0.3197,
|
4988 |
"step": 415000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4989 |
}
|
4990 |
],
|
4991 |
"max_steps": 500000,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.148934902681679,
|
5 |
+
"global_step": 420000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4986 |
"learning_rate": 1.978413491748704e-05,
|
4987 |
"loss": 0.3197,
|
4988 |
"step": 415000
|
4989 |
+
},
|
4990 |
+
{
|
4991 |
+
"epoch": 7.07,
|
4992 |
+
"learning_rate": 1.9672296776913344e-05,
|
4993 |
+
"loss": 0.3197,
|
4994 |
+
"step": 415500
|
4995 |
+
},
|
4996 |
+
{
|
4997 |
+
"epoch": 7.08,
|
4998 |
+
"learning_rate": 1.956105404641519e-05,
|
4999 |
+
"loss": 0.3195,
|
5000 |
+
"step": 416000
|
5001 |
+
},
|
5002 |
+
{
|
5003 |
+
"epoch": 7.09,
|
5004 |
+
"learning_rate": 1.945040782391339e-05,
|
5005 |
+
"loss": 0.3196,
|
5006 |
+
"step": 416500
|
5007 |
+
},
|
5008 |
+
{
|
5009 |
+
"epoch": 7.1,
|
5010 |
+
"learning_rate": 1.9340359201441538e-05,
|
5011 |
+
"loss": 0.3195,
|
5012 |
+
"step": 417000
|
5013 |
+
},
|
5014 |
+
{
|
5015 |
+
"epoch": 7.11,
|
5016 |
+
"learning_rate": 1.923090926513507e-05,
|
5017 |
+
"loss": 0.3193,
|
5018 |
+
"step": 417500
|
5019 |
+
},
|
5020 |
+
{
|
5021 |
+
"epoch": 7.11,
|
5022 |
+
"learning_rate": 1.912205909522066e-05,
|
5023 |
+
"loss": 0.3196,
|
5024 |
+
"step": 418000
|
5025 |
+
},
|
5026 |
+
{
|
5027 |
+
"epoch": 7.12,
|
5028 |
+
"learning_rate": 1.9013809766005588e-05,
|
5029 |
+
"loss": 0.3196,
|
5030 |
+
"step": 418500
|
5031 |
+
},
|
5032 |
+
{
|
5033 |
+
"epoch": 7.13,
|
5034 |
+
"learning_rate": 1.8906162345866986e-05,
|
5035 |
+
"loss": 0.3195,
|
5036 |
+
"step": 419000
|
5037 |
+
},
|
5038 |
+
{
|
5039 |
+
"epoch": 7.14,
|
5040 |
+
"learning_rate": 1.879911789724147e-05,
|
5041 |
+
"loss": 0.3196,
|
5042 |
+
"step": 419500
|
5043 |
+
},
|
5044 |
+
{
|
5045 |
+
"epoch": 7.15,
|
5046 |
+
"learning_rate": 1.8692889753936567e-05,
|
5047 |
+
"loss": 0.3195,
|
5048 |
+
"step": 420000
|
5049 |
}
|
5050 |
],
|
5051 |
"max_steps": 500000,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201355195
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6254beaf2d9e3c9734b690d3b6c083e636ef9d7458e4ab7d5e2c554bd214921
|
3 |
size 201355195
|