Training in progress, step 86, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +89 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:030eaddc6f6e92696098f138d1d6f183ae1feab61caf59dad9121a005f8a739a
|
3 |
size 50624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118090
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73e75b1442d6f68ae9ec14ea3d9bb12f9dedda9bc5c41de5f976e4bb49cd2095
|
3 |
size 118090
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66b4fed55d7442ec4f04242a594d7c198315a7d44290c90ffaea764d86aad661
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb08bb8a586d27643dcf86a9f8306def51a9136f80f5802d3aee00dc499c85b7
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41209214eef4e8962e9e3297d4c43c9981f0d838e4478bdc95bb660b06addb87
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c58fa9498e2c74f0eb38ae93ff5adde3fe07458cf51af1baaaa3f0aa96d3fb9
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3515ffb61bbd99b16f0cf41af74761fa2ee8d9e372c8ce4c68c7f0ba42572ed2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 8,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -612,6 +612,91 @@
|
|
612 |
"learning_rate": 5.080450905401057e-06,
|
613 |
"loss": 10.364,
|
614 |
"step": 75
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
}
|
616 |
],
|
617 |
"logging_steps": 1,
|
@@ -626,12 +711,12 @@
|
|
626 |
"should_evaluate": false,
|
627 |
"should_log": false,
|
628 |
"should_save": true,
|
629 |
-
"should_training_stop":
|
630 |
},
|
631 |
"attributes": {}
|
632 |
}
|
633 |
},
|
634 |
-
"total_flos":
|
635 |
"train_batch_size": 8,
|
636 |
"trial_name": null,
|
637 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.017543859649123,
|
5 |
"eval_steps": 8,
|
6 |
+
"global_step": 86,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
612 |
"learning_rate": 5.080450905401057e-06,
|
613 |
"loss": 10.364,
|
614 |
"step": 75
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 2.6666666666666665,
|
618 |
+
"grad_norm": 0.04788883775472641,
|
619 |
+
"learning_rate": 4.2113336672471245e-06,
|
620 |
+
"loss": 10.3642,
|
621 |
+
"step": 76
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"epoch": 2.7017543859649122,
|
625 |
+
"grad_norm": 0.05305058881640434,
|
626 |
+
"learning_rate": 3.420445597436056e-06,
|
627 |
+
"loss": 10.3684,
|
628 |
+
"step": 77
|
629 |
+
},
|
630 |
+
{
|
631 |
+
"epoch": 2.736842105263158,
|
632 |
+
"grad_norm": 0.04230741783976555,
|
633 |
+
"learning_rate": 2.7091379149682685e-06,
|
634 |
+
"loss": 10.3681,
|
635 |
+
"step": 78
|
636 |
+
},
|
637 |
+
{
|
638 |
+
"epoch": 2.7719298245614032,
|
639 |
+
"grad_norm": 0.04892972111701965,
|
640 |
+
"learning_rate": 2.0786258770873647e-06,
|
641 |
+
"loss": 10.3648,
|
642 |
+
"step": 79
|
643 |
+
},
|
644 |
+
{
|
645 |
+
"epoch": 2.807017543859649,
|
646 |
+
"grad_norm": 0.05191851034760475,
|
647 |
+
"learning_rate": 1.5299867030334814e-06,
|
648 |
+
"loss": 10.3678,
|
649 |
+
"step": 80
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"epoch": 2.807017543859649,
|
653 |
+
"eval_loss": 10.36169147491455,
|
654 |
+
"eval_runtime": 0.0484,
|
655 |
+
"eval_samples_per_second": 1983.67,
|
656 |
+
"eval_steps_per_second": 61.99,
|
657 |
+
"step": 80
|
658 |
+
},
|
659 |
+
{
|
660 |
+
"epoch": 2.8421052631578947,
|
661 |
+
"grad_norm": 0.05835256725549698,
|
662 |
+
"learning_rate": 1.064157733632276e-06,
|
663 |
+
"loss": 10.3622,
|
664 |
+
"step": 81
|
665 |
+
},
|
666 |
+
{
|
667 |
+
"epoch": 2.8771929824561404,
|
668 |
+
"grad_norm": 0.051547639071941376,
|
669 |
+
"learning_rate": 6.819348298638839e-07,
|
670 |
+
"loss": 10.3642,
|
671 |
+
"step": 82
|
672 |
+
},
|
673 |
+
{
|
674 |
+
"epoch": 2.912280701754386,
|
675 |
+
"grad_norm": 0.05024786293506622,
|
676 |
+
"learning_rate": 3.839710131477492e-07,
|
677 |
+
"loss": 10.3686,
|
678 |
+
"step": 83
|
679 |
+
},
|
680 |
+
{
|
681 |
+
"epoch": 2.9473684210526314,
|
682 |
+
"grad_norm": 0.05746513977646828,
|
683 |
+
"learning_rate": 1.7077534966650766e-07,
|
684 |
+
"loss": 10.3635,
|
685 |
+
"step": 84
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"epoch": 2.982456140350877,
|
689 |
+
"grad_norm": 0.05468269810080528,
|
690 |
+
"learning_rate": 4.2712080634949024e-08,
|
691 |
+
"loss": 10.3656,
|
692 |
+
"step": 85
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 3.017543859649123,
|
696 |
+
"grad_norm": 0.0812983587384224,
|
697 |
+
"learning_rate": 0.0,
|
698 |
+
"loss": 15.6424,
|
699 |
+
"step": 86
|
700 |
}
|
701 |
],
|
702 |
"logging_steps": 1,
|
|
|
711 |
"should_evaluate": false,
|
712 |
"should_log": false,
|
713 |
"should_save": true,
|
714 |
+
"should_training_stop": true
|
715 |
},
|
716 |
"attributes": {}
|
717 |
}
|
718 |
},
|
719 |
+
"total_flos": 17992041627648.0,
|
720 |
"train_batch_size": 8,
|
721 |
"trial_name": null,
|
722 |
"trial_params": null
|