Training in progress, step 28000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +2 -2
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- pytorch_model.bin +1 -1
- runs/Jun18_17-52-03_85d30a282e1b/events.out.tfevents.1687112276.85d30a282e1b.283.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72852881cb27f03d68aee156701d9ed4abca3e02139d1f3812166937936c6930
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccfdfe1dd57bf2350531d07b5f6b9ea9f61b2ea739f511397abc3f3034b989bd
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4991eb3f924760cf4a9e1e172b7a6f137205a9c97add30e32a7b852475b57b1c
|
3 |
+
size 14511
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efb484ebd13275de377f305282b7e5e723e42ba1a1a11aeb0a9898c9fca5c12c
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:303647ce6cd04dfc3f358ec168c62515611763039c425f1bf0082c6ae4113cb6
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -294,11 +294,59 @@
|
|
294 |
"learning_rate": 1.5149282428923398e-05,
|
295 |
"loss": 1.3745,
|
296 |
"step": 24000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
}
|
298 |
],
|
299 |
"max_steps": 38148,
|
300 |
"num_train_epochs": 1,
|
301 |
-
"total_flos":
|
302 |
"trial_name": null,
|
303 |
"trial_params": null
|
304 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.733975736859213,
|
5 |
+
"global_step": 28000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
294 |
"learning_rate": 1.5149282428923398e-05,
|
295 |
"loss": 1.3745,
|
296 |
"step": 24000
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.64,
|
300 |
+
"learning_rate": 1.4211771280398977e-05,
|
301 |
+
"loss": 1.3667,
|
302 |
+
"step": 24500
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"epoch": 0.66,
|
306 |
+
"learning_rate": 1.3292548922523571e-05,
|
307 |
+
"loss": 1.3644,
|
308 |
+
"step": 25000
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"epoch": 0.67,
|
312 |
+
"learning_rate": 1.2393173670911351e-05,
|
313 |
+
"loss": 1.338,
|
314 |
+
"step": 25500
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 0.68,
|
318 |
+
"learning_rate": 1.1515170195285154e-05,
|
319 |
+
"loss": 1.3728,
|
320 |
+
"step": 26000
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"epoch": 0.69,
|
324 |
+
"learning_rate": 1.0660026934773603e-05,
|
325 |
+
"loss": 1.3354,
|
326 |
+
"step": 26500
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"epoch": 0.71,
|
330 |
+
"learning_rate": 9.82919357462835e-06,
|
331 |
+
"loss": 1.342,
|
332 |
+
"step": 27000
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 0.72,
|
336 |
+
"learning_rate": 9.024078588639035e-06,
|
337 |
+
"loss": 1.3398,
|
338 |
+
"step": 27500
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.73,
|
342 |
+
"learning_rate": 8.246046851412012e-06,
|
343 |
+
"loss": 1.3244,
|
344 |
+
"step": 28000
|
345 |
}
|
346 |
],
|
347 |
"max_steps": 38148,
|
348 |
"num_train_epochs": 1,
|
349 |
+
"total_flos": 3.0843724697985024e+16,
|
350 |
"trial_name": null,
|
351 |
"trial_params": null
|
352 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccfdfe1dd57bf2350531d07b5f6b9ea9f61b2ea739f511397abc3f3034b989bd
|
3 |
size 1944201353
|
runs/Jun18_17-52-03_85d30a282e1b/events.out.tfevents.1687112276.85d30a282e1b.283.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95b59199deca4c67e4efb69b372b3968be640ed5eb995cd37ecb0b21502dbdc1
|
3 |
+
size 7022
|