Training in progress, step 16000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +2 -2
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/Jun16_15-16-02_9a4f5f66b33d/1686930096.7225094/events.out.tfevents.1686930096.9a4f5f66b33d.224.1 +3 -0
- runs/Jun16_15-16-02_9a4f5f66b33d/events.out.tfevents.1686930096.9a4f5f66b33d.224.0 +3 -0
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc082dac1aeb46f9ac55ca99c65385b0e240df4c8f1cc27ede41faf0e12d5ff0
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b54e83455838b8285a80087d91dcce55188194c5a5c7c15f55ed356fc18324e1
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e300d124805e85d165696b1ab015335ceda48c16b90205e1b09c9d3b63767971
|
3 |
+
size 14575
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d79de33a1dadebecdc6710fbbebc54394b36ff40dc16ce7c4b9cd325b8d41b2
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f148927d78bff2b3194a8dac67d57033d9ed30fb38f0b33bdb5e3be6ecfc774
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -150,11 +150,59 @@
|
|
150 |
"learning_rate": 3.876275184846879e-05,
|
151 |
"loss": 1.4809,
|
152 |
"step": 12000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
}
|
154 |
],
|
155 |
"max_steps": 38148,
|
156 |
"num_train_epochs": 1,
|
157 |
-
"total_flos": 1.
|
158 |
"trial_name": null,
|
159 |
"trial_params": null
|
160 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4194147067766931,
|
5 |
+
"global_step": 16000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
150 |
"learning_rate": 3.876275184846879e-05,
|
151 |
"loss": 1.4809,
|
152 |
"step": 12000
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 0.33,
|
156 |
+
"learning_rate": 3.78919487009023e-05,
|
157 |
+
"loss": 1.4744,
|
158 |
+
"step": 12500
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 0.34,
|
162 |
+
"learning_rate": 3.6999290422435554e-05,
|
163 |
+
"loss": 1.4659,
|
164 |
+
"step": 13000
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 0.35,
|
168 |
+
"learning_rate": 3.608629029581408e-05,
|
169 |
+
"loss": 1.4679,
|
170 |
+
"step": 13500
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.37,
|
174 |
+
"learning_rate": 3.5154496088388036e-05,
|
175 |
+
"loss": 1.4721,
|
176 |
+
"step": 14000
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 0.38,
|
180 |
+
"learning_rate": 3.420548742825304e-05,
|
181 |
+
"loss": 1.4471,
|
182 |
+
"step": 14500
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"epoch": 0.39,
|
186 |
+
"learning_rate": 3.3240873126378964e-05,
|
187 |
+
"loss": 1.4757,
|
188 |
+
"step": 15000
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 0.41,
|
192 |
+
"learning_rate": 3.226228844926621e-05,
|
193 |
+
"loss": 1.4617,
|
194 |
+
"step": 15500
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 0.42,
|
198 |
+
"learning_rate": 3.1271392346753284e-05,
|
199 |
+
"loss": 1.4551,
|
200 |
+
"step": 16000
|
201 |
}
|
202 |
],
|
203 |
"max_steps": 38148,
|
204 |
"num_train_epochs": 1,
|
205 |
+
"total_flos": 1.7648755970310144e+16,
|
206 |
"trial_name": null,
|
207 |
"trial_params": null
|
208 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a91745d5c34dd898b0980ec93cf90caddb5b2af7b8791e7a7000ac13519890d
|
3 |
size 3771
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b54e83455838b8285a80087d91dcce55188194c5a5c7c15f55ed356fc18324e1
|
3 |
size 1944201353
|
runs/Jun16_15-16-02_9a4f5f66b33d/1686930096.7225094/events.out.tfevents.1686930096.9a4f5f66b33d.224.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2df17be65fa5f27aacbcaced672ce08c7bf7ad919618c5ffd78891426170638
|
3 |
+
size 6187
|
runs/Jun16_15-16-02_9a4f5f66b33d/events.out.tfevents.1686930096.9a4f5f66b33d.224.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3eef1ee6af72ade29844b19974c89874a0bb78d9690d67bd7b44fde532ddd478
|
3 |
+
size 5718
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a91745d5c34dd898b0980ec93cf90caddb5b2af7b8791e7a7000ac13519890d
|
3 |
size 3771
|