Training in progress, step 40000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +51 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- runs/May14_15-54-58_fce7ca761ee9/1684081292.317363/events.out.tfevents.1684081292.fce7ca761ee9.347.1 +3 -0
- runs/May14_15-54-58_fce7ca761ee9/events.out.tfevents.1684081292.fce7ca761ee9.347.0 +3 -0
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3871543575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b69bc7657c41e490228da25e605f8d99e96a76d198387a1426d235fcd1484247
|
3 |
size 3871543575
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531767387406a4a1adc98dcb379faedfd5d67970eb49105f8006c40fdfde280b
|
3 |
size 1944201353
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90784694101436cc26fa286742ac9e70f8a3dc8930b645d52069518e84d16286
|
3 |
size 14575
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6240fd3911fd3b1c79e44695b5135aaede3cad0c4630f7dafb03af6e91703a8
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c80aab68e2730aac66e1e517ede42881ca1bab8f2f86eebec191aa801b7fb666
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -438,11 +438,59 @@
|
|
438 |
"learning_rate": 4.982070995461684e-06,
|
439 |
"loss": 1.7599,
|
440 |
"step": 36000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
}
|
442 |
],
|
443 |
"max_steps": 943410,
|
444 |
"num_train_epochs": 10,
|
445 |
-
"total_flos": 2.
|
446 |
"trial_name": null,
|
447 |
"trial_params": null
|
448 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.42399380969037853,
|
5 |
+
"global_step": 40000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
438 |
"learning_rate": 4.982070995461684e-06,
|
439 |
"loss": 1.7599,
|
440 |
"step": 36000
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"epoch": 0.39,
|
444 |
+
"learning_rate": 4.981570938885529e-06,
|
445 |
+
"loss": 1.7501,
|
446 |
+
"step": 36500
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"epoch": 0.39,
|
450 |
+
"learning_rate": 4.9810630074375465e-06,
|
451 |
+
"loss": 1.7679,
|
452 |
+
"step": 37000
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"epoch": 0.4,
|
456 |
+
"learning_rate": 4.980548197762693e-06,
|
457 |
+
"loss": 1.7317,
|
458 |
+
"step": 37500
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"epoch": 0.4,
|
462 |
+
"learning_rate": 4.980026511288172e-06,
|
463 |
+
"loss": 1.7846,
|
464 |
+
"step": 38000
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 0.41,
|
468 |
+
"learning_rate": 4.979497949460249e-06,
|
469 |
+
"loss": 1.7632,
|
470 |
+
"step": 38500
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 0.41,
|
474 |
+
"learning_rate": 4.9789625137442505e-06,
|
475 |
+
"loss": 1.759,
|
476 |
+
"step": 39000
|
477 |
+
},
|
478 |
+
{
|
479 |
+
"epoch": 0.42,
|
480 |
+
"learning_rate": 4.97842020562456e-06,
|
481 |
+
"loss": 1.7392,
|
482 |
+
"step": 39500
|
483 |
+
},
|
484 |
+
{
|
485 |
+
"epoch": 0.42,
|
486 |
+
"learning_rate": 4.977872131818803e-06,
|
487 |
+
"loss": 1.768,
|
488 |
+
"step": 40000
|
489 |
}
|
490 |
],
|
491 |
"max_steps": 943410,
|
492 |
"num_train_epochs": 10,
|
493 |
+
"total_flos": 2.2568599797497856e+16,
|
494 |
"trial_name": null,
|
495 |
"trial_params": null
|
496 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae93eba663a7f91d7241c8950655122ed7e58342d8bf1eed79ca8f112367475f
|
3 |
size 3771
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1944201353
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531767387406a4a1adc98dcb379faedfd5d67970eb49105f8006c40fdfde280b
|
3 |
size 1944201353
|
runs/May14_15-54-58_fce7ca761ee9/1684081292.317363/events.out.tfevents.1684081292.fce7ca761ee9.347.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2946190b54245655052ad83c09cf29893d173c88dacdb71d1382c2a553766284
|
3 |
+
size 6187
|
runs/May14_15-54-58_fce7ca761ee9/events.out.tfevents.1684081292.fce7ca761ee9.347.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:337fcb494b7a835e6e568b0925202605a87866f16c178c22af18e2dcf1debc4f
|
3 |
+
size 5743
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae93eba663a7f91d7241c8950655122ed7e58342d8bf1eed79ca8f112367475f
|
3 |
size 3771
|