checkpoint 9000
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +85 -5
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262032932
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a61e6e5352efdd5b7f9da781d485d41785a65d97707d8a50f8ed3703500e382
|
3 |
size 1262032932
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490610241
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d61d798a6ebe2df9f0c91788f23fc918b91a4b2bf0166521c19127eccff65502
|
3 |
size 2490610241
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14639
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70d3644a4cf26f14e27a5301570cd0ec97d359dfc1ab88070196c7de228b37c2
|
3 |
size 14639
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fb799b5054cd707634c1757db8b87f65d5adfd51f621867293ee2d7170c2937
|
3 |
size 627
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./working/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -647,6 +647,86 @@
|
|
647 |
"eval_steps_per_second": 4.029,
|
648 |
"eval_wer": 0.6170212765957447,
|
649 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
650 |
}
|
651 |
],
|
652 |
"logging_steps": 100,
|
@@ -654,7 +734,7 @@
|
|
654 |
"num_input_tokens_seen": 0,
|
655 |
"num_train_epochs": 120,
|
656 |
"save_steps": 1000,
|
657 |
-
"total_flos": 2.
|
658 |
"train_batch_size": 8,
|
659 |
"trial_name": null,
|
660 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.3642527461051941,
|
3 |
+
"best_model_checkpoint": "./working/checkpoint-9000",
|
4 |
+
"epoch": 107.14285714285714,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 9000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
647 |
"eval_steps_per_second": 4.029,
|
648 |
"eval_wer": 0.6170212765957447,
|
649 |
"step": 8000
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"epoch": 96.43,
|
653 |
+
"grad_norm": 2.8603055477142334,
|
654 |
+
"learning_rate": 7.14e-06,
|
655 |
+
"loss": 0.9576,
|
656 |
+
"step": 8100
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"epoch": 97.62,
|
660 |
+
"grad_norm": 2.931117296218872,
|
661 |
+
"learning_rate": 6.7650000000000005e-06,
|
662 |
+
"loss": 0.9579,
|
663 |
+
"step": 8200
|
664 |
+
},
|
665 |
+
{
|
666 |
+
"epoch": 98.81,
|
667 |
+
"grad_norm": 3.449780225753784,
|
668 |
+
"learning_rate": 6.39e-06,
|
669 |
+
"loss": 0.9535,
|
670 |
+
"step": 8300
|
671 |
+
},
|
672 |
+
{
|
673 |
+
"epoch": 100.0,
|
674 |
+
"grad_norm": 4.3435468673706055,
|
675 |
+
"learning_rate": 6.015000000000001e-06,
|
676 |
+
"loss": 0.9463,
|
677 |
+
"step": 8400
|
678 |
+
},
|
679 |
+
{
|
680 |
+
"epoch": 101.19,
|
681 |
+
"grad_norm": 2.2839837074279785,
|
682 |
+
"learning_rate": 5.64e-06,
|
683 |
+
"loss": 0.9413,
|
684 |
+
"step": 8500
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 102.38,
|
688 |
+
"grad_norm": 3.1021485328674316,
|
689 |
+
"learning_rate": 5.2649999999999996e-06,
|
690 |
+
"loss": 0.9436,
|
691 |
+
"step": 8600
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"epoch": 103.57,
|
695 |
+
"grad_norm": 2.9421229362487793,
|
696 |
+
"learning_rate": 4.890000000000001e-06,
|
697 |
+
"loss": 0.939,
|
698 |
+
"step": 8700
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 104.76,
|
702 |
+
"grad_norm": 2.0578436851501465,
|
703 |
+
"learning_rate": 4.515e-06,
|
704 |
+
"loss": 0.9338,
|
705 |
+
"step": 8800
|
706 |
+
},
|
707 |
+
{
|
708 |
+
"epoch": 105.95,
|
709 |
+
"grad_norm": 3.5860297679901123,
|
710 |
+
"learning_rate": 4.14e-06,
|
711 |
+
"loss": 0.9315,
|
712 |
+
"step": 8900
|
713 |
+
},
|
714 |
+
{
|
715 |
+
"epoch": 107.14,
|
716 |
+
"grad_norm": 2.1002159118652344,
|
717 |
+
"learning_rate": 3.765e-06,
|
718 |
+
"loss": 0.9279,
|
719 |
+
"step": 9000
|
720 |
+
},
|
721 |
+
{
|
722 |
+
"epoch": 107.14,
|
723 |
+
"eval_cer": 0.13245100020206102,
|
724 |
+
"eval_loss": 0.3642527461051941,
|
725 |
+
"eval_runtime": 10.5077,
|
726 |
+
"eval_samples_per_second": 31.976,
|
727 |
+
"eval_steps_per_second": 3.997,
|
728 |
+
"eval_wer": 0.6248053969901401,
|
729 |
+
"step": 9000
|
730 |
}
|
731 |
],
|
732 |
"logging_steps": 100,
|
|
|
734 |
"num_input_tokens_seen": 0,
|
735 |
"num_train_epochs": 120,
|
736 |
"save_steps": 1000,
|
737 |
+
"total_flos": 2.7115316842745717e+19,
|
738 |
"train_batch_size": 8,
|
739 |
"trial_name": null,
|
740 |
"trial_params": null
|