cp 17600
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- scheduler.pt +1 -1
- trainer_state.json +101 -3
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490339591
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dc817f3687769e04e797f4a2d65e705085e218a0e605bfbc439368268116963
|
3 |
size 2490339591
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262065048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9fe9ece7c8d596406cb8184e772cd600ac8713a89bd8b7cfcd2b5fcf1aef922
|
3 |
size 1262065048
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b8d971c35266efc366e4ed2b2f0e0bd8cccd0cc38cab1592331781a3b1c85ea
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -524,11 +524,109 @@
|
|
524 |
"eval_samples_per_second": 7.977,
|
525 |
"eval_wer": 0.3517199017199017,
|
526 |
"step": 14800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
527 |
}
|
528 |
],
|
529 |
"max_steps": 29490,
|
530 |
"num_train_epochs": 30,
|
531 |
-
"total_flos":
|
532 |
"trial_name": null,
|
533 |
"trial_params": null
|
534 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 17.904374364191252,
|
5 |
+
"global_step": 17600,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
524 |
"eval_samples_per_second": 7.977,
|
525 |
"eval_wer": 0.3517199017199017,
|
526 |
"step": 14800
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 15.46,
|
530 |
+
"learning_rate": 0.00014787857882028285,
|
531 |
+
"loss": 0.0717,
|
532 |
+
"step": 15200
|
533 |
+
},
|
534 |
+
{
|
535 |
+
"epoch": 15.46,
|
536 |
+
"eval_loss": 0.49670103192329407,
|
537 |
+
"eval_runtime": 236.0843,
|
538 |
+
"eval_samples_per_second": 7.925,
|
539 |
+
"eval_wer": 0.3600737100737101,
|
540 |
+
"step": 15200
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"epoch": 15.87,
|
544 |
+
"learning_rate": 0.00014373922042083476,
|
545 |
+
"loss": 0.0708,
|
546 |
+
"step": 15600
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"epoch": 15.87,
|
550 |
+
"eval_loss": 0.46058785915374756,
|
551 |
+
"eval_runtime": 242.6156,
|
552 |
+
"eval_samples_per_second": 7.712,
|
553 |
+
"eval_wer": 0.35982800982800983,
|
554 |
+
"step": 15600
|
555 |
+
},
|
556 |
+
{
|
557 |
+
"epoch": 16.28,
|
558 |
+
"learning_rate": 0.00013959986202138666,
|
559 |
+
"loss": 0.0673,
|
560 |
+
"step": 16000
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"epoch": 16.28,
|
564 |
+
"eval_loss": 0.45084264874458313,
|
565 |
+
"eval_runtime": 234.0195,
|
566 |
+
"eval_samples_per_second": 7.995,
|
567 |
+
"eval_wer": 0.3546683046683047,
|
568 |
+
"step": 16000
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"epoch": 16.68,
|
572 |
+
"learning_rate": 0.0001354605036219386,
|
573 |
+
"loss": 0.0664,
|
574 |
+
"step": 16400
|
575 |
+
},
|
576 |
+
{
|
577 |
+
"epoch": 16.68,
|
578 |
+
"eval_loss": 0.4838450849056244,
|
579 |
+
"eval_runtime": 237.8079,
|
580 |
+
"eval_samples_per_second": 7.868,
|
581 |
+
"eval_wer": 0.35614250614250614,
|
582 |
+
"step": 16400
|
583 |
+
},
|
584 |
+
{
|
585 |
+
"epoch": 17.09,
|
586 |
+
"learning_rate": 0.0001313211452224905,
|
587 |
+
"loss": 0.0639,
|
588 |
+
"step": 16800
|
589 |
+
},
|
590 |
+
{
|
591 |
+
"epoch": 17.09,
|
592 |
+
"eval_loss": 0.4703587591648102,
|
593 |
+
"eval_runtime": 235.9394,
|
594 |
+
"eval_samples_per_second": 7.93,
|
595 |
+
"eval_wer": 0.35657248157248156,
|
596 |
+
"step": 16800
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"epoch": 17.5,
|
600 |
+
"learning_rate": 0.0001271817868230424,
|
601 |
+
"loss": 0.0597,
|
602 |
+
"step": 17200
|
603 |
+
},
|
604 |
+
{
|
605 |
+
"epoch": 17.5,
|
606 |
+
"eval_loss": 0.47286155819892883,
|
607 |
+
"eval_runtime": 237.4364,
|
608 |
+
"eval_samples_per_second": 7.88,
|
609 |
+
"eval_wer": 0.35657248157248156,
|
610 |
+
"step": 17200
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"epoch": 17.9,
|
614 |
+
"learning_rate": 0.00012304242842359434,
|
615 |
+
"loss": 0.068,
|
616 |
+
"step": 17600
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 17.9,
|
620 |
+
"eval_loss": 0.42778506875038147,
|
621 |
+
"eval_runtime": 235.8032,
|
622 |
+
"eval_samples_per_second": 7.935,
|
623 |
+
"eval_wer": 0.3484029484029484,
|
624 |
+
"step": 17600
|
625 |
}
|
626 |
],
|
627 |
"max_steps": 29490,
|
628 |
"num_train_epochs": 30,
|
629 |
+
"total_flos": 9.766671614435893e+19,
|
630 |
"trial_name": null,
|
631 |
"trial_params": null
|
632 |
}
|