Training in progress, epoch 14
Browse files- logs/events.out.tfevents.1715485378.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +33 -1
logs/events.out.tfevents.1715485378.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a08917b69d69a8bcb33026aa6faff82e604a63af0185065fa8c1a1a35d177c5b
|
3 |
+
size 95277
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:286433d8f0a21a70021e2594853cacd326b07647114b4edab6752ee956a30794
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -618,4 +618,36 @@ command outputs:
|
|
618 |
|
619 |
92%|ββββββββββ| 9850/10682 [1:23:33<06:49, 2.03it/s]
|
620 |
92%|ββββββββββ| 9851/10682 [1:23:34<06:49, 2.03it/s]
|
621 |
92%|ββββββββββ| 9852/10682 [1:23:34<06:49, 2.03it/s]
|
622 |
92%|ββββββββββ| 9853/10682 [1:23:35<06:49, 2.03it/s]
|
623 |
92%|ββββββββββ| 9854/10682 [1:23:35<06:48, 2.03it/s]
|
624 |
92%|ββββββββββ| 9855/10682 [1:23:36<06:48, 2.03it/s]
|
625 |
92%|ββββββββββ| 9856/10682 [1:23:36<06:47, 2.03it/s]
|
626 |
92%|ββββββββββ| 9857/10682 [1:23:37<06:46, 2.03it/s]
|
627 |
92%|ββββββββββ| 9858/10682 [1:23:37<06:46, 2.03it/s]
|
628 |
92%|ββββββββββ| 9859/10682 [1:23:38<06:45, 2.03it/s]
|
629 |
92%|ββββββββββ| 9860/10682 [1:23:38<06:44, 2.03it/s]
|
630 |
92%|ββββββββββ| 9861/10682 [1:23:39<06:44, 2.03it/s]
|
631 |
92%|ββββββββββ| 9862/10682 [1:23:39<06:43, 2.03it/s]
|
632 |
92%|ββββββββββ| 9863/10682 [1:23:40<06:43, 2.03it/s]
|
633 |
92%|ββββββββββ| 9864/10682 [1:23:40<06:42, 2.03it/s]
|
634 |
92%|ββββββββββ| 9865/10682 [1:23:41<06:42, 2.03it/s]
|
635 |
92%|ββββββββββ| 9866/10682 [1:23:41<06:42, 2.03it/s]
|
636 |
92%|ββββββββββ| 9867/10682 [1:23:42<06:41, 2.03it/s]
|
637 |
92%|ββββββββββ| 9868/10682 [1:23:42<06:41, 2.03it/s]
|
638 |
92%|ββββββββββ| 9869/10682 [1:23:43<06:40, 2.03it/s]
|
639 |
92%|ββββββββββ| 9870/10682 [1:23:43<06:39, 2.03it/s]
|
640 |
92%|ββββββββββ| 9871/10682 [1:23:44<06:39, 2.03it/s]
|
641 |
92%|ββββββββββ| 9872/10682 [1:23:44<06:38, 2.03it/s]
|
642 |
92%|ββββββββββ| 9873/10682 [1:23:45<06:38, 2.03it/s]
|
643 |
92%|ββββββββββ| 9874/10682 [1:23:45<06:38, 2.03it/s]
|
644 |
92%|ββββββββββ| 9875/10682 [1:23:46<06:37, 2.03it/s]
|
645 |
{'loss': 2.8104, 'grad_norm': 0.26470834016799927, 'learning_rate': 1.7288216615031272e-05, 'epoch': 12.94}
|
646 |
|
647 |
92%|ββββββββββ| 9875/10682 [1:23:46<06:37, 2.03it/s]
|
648 |
92%|ββββββββββ| 9876/10682 [1:23:46<06:37, 2.03it/s]
|
649 |
92%|ββββββββββ| 9877/10682 [1:23:47<06:36, 2.03it/s]
|
650 |
92%|ββββββββββ| 9878/10682 [1:23:47<06:36, 2.03it/s]
|
651 |
92%|ββββββββββ| 9879/10682 [1:23:48<06:35, 2.03it/s]
|
652 |
92%|ββββββββββ| 9880/10682 [1:23:48<06:35, 2.03it/s]
|
653 |
93%|ββββββββββ| 9881/10682 [1:23:49<06:34, 2.03it/s]
|
654 |
93%|ββββββββββ| 9882/10682 [1:23:49<06:34, 2.03it/s]
|
655 |
93%|ββββββββββ| 9883/10682 [1:23:50<06:34, 2.03it/s]
|
656 |
93%|ββββββββββ| 9884/10682 [1:23:50<06:33, 2.03it/s]
|
657 |
93%|ββββββββββ| 9885/10682 [1:23:51<06:33, 2.03it/s]
|
658 |
93%|ββββββββββ| 9886/10682 [1:23:51<06:32, 2.03it/s]
|
659 |
93%|ββββββββββ| 9887/10682 [1:23:52<06:32, 2.03it/s]
|
660 |
93%|ββββββββββ| 9888/10682 [1:23:52<06:31, 2.03it/s]
|
661 |
93%|ββββββββββ| 9889/10682 [1:23:53<06:30, 2.03it/s]
|
662 |
93%|ββββββββββ| 9890/10682 [1:23:53<06:30, 2.03it/s]
|
663 |
93%|ββββββββββ| 9891/10682 [1:23:54<06:29, 2.03it/s]
|
664 |
93%|ββββββββββ| 9892/10682 [1:23:54<06:29, 2.03it/s]
|
665 |
93%|ββββββββββ| 9893/10682 [1:23:55<06:29, 2.03it/s]
|
666 |
93%|ββββββββββ| 9894/10682 [1:23:55<06:28, 2.03it/s]
|
667 |
93%|ββββββββββ| 9895/10682 [1:23:56<06:28, 2.03it/s]
|
668 |
93%|ββββββββββ| 9896/10682 [1:23:56<06:27, 2.03it/s]
|
669 |
93%|ββββββββββ| 9897/10682 [1:23:57<06:27, 2.03it/s]
|
670 |
93%|ββββββββββ| 9898/10682 [1:23:57<06:26, 2.03it/s]
|
671 |
93%|ββββββββββ| 9899/10682 [1:23:58<06:25, 2.03it/s]
|
672 |
93%|ββββββββββ| 9900/10682 [1:23:58<06:25, 2.03it/s]
|
673 |
|
674 |
|
675 |
93%|ββββββββββ| 9900/10682 [1:23:58<06:25, 2.03it/s]
|
676 |
93%|ββββββββββ| 9901/10682 [1:23:59<06:25, 2.02it/s]
|
677 |
93%|ββββββββββ| 9902/10682 [1:23:59<06:24, 2.03it/s]
|
678 |
93%|ββββββββββ| 9903/10682 [1:24:00<06:24, 2.03it/s]
|
679 |
93%|ββββββββββ| 9904/10682 [1:24:00<06:23, 2.03it/s]
|
680 |
93%|ββββββββββ| 9905/10682 [1:24:01<06:22, 2.03it/s]
|
681 |
93%|ββββββββββ| 9906/10682 [1:24:01<06:22, 2.03it/s]
|
682 |
93%|ββββββββββ| 9907/10682 [1:24:02<06:21, 2.03it/s]
|
683 |
93%|ββββββββββ| 9908/10682 [1:24:02<06:20, 2.03it/s]
|
684 |
93%|ββββββββββ| 9909/10682 [1:24:03<06:21, 2.03it/s]
|
685 |
93%|ββββββββββ| 9910/10682 [1:24:03<06:20, 2.03it/s]
|
686 |
93%|ββββββββββ| 9911/10682 [1:24:04<06:20, 2.03it/s]
|
687 |
93%|ββββββββββ| 9912/10682 [1:24:04<06:19, 2.03it/s]
|
688 |
93%|ββββββββββ| 9913/10682 [1:24:05<06:18, 2.03it/s]
|
689 |
93%|ββββββββββ| 9914/10682 [1:24:05<06:18, 2.03it/s]
|
690 |
93%|ββββββββββ| 9915/10682 [1:24:05<06:17, 2.03it/s]
|
691 |
93%|ββββββββββ| 9916/10682 [1:24:06<06:17, 2.03it/s]
|
692 |
93%|ββββββββββ| 9917/10682 [1:24:06<06:16, 2.03it/s]
|
693 |
93%|ββββββββββ| 9918/10682 [1:24:07<06:16, 2.03it/s]
|
694 |
93%|ββββββββββ| 9919/10682 [1:24:07<06:12, 2.05it/s]
|
695 |
93%|ββββββββββ| 9920/10682 [1:24:20<51:57, 4.09s/it]
|
696 |
93%|ββββββββββ| 9921/10682 [1:24:20<38:11, 3.01s/it]
|
697 |
93%|ββββββββββ| 9922/10682 [1:24:21<28:34, 2.26s/it]
|
698 |
93%|ββββββββββ| 9923/10682 [1:24:21<21:52, 1.73s/it]
|
699 |
93%|ββββββββββ| 9924/10682 [1:24:22<17:09, 1.36s/it]
|
700 |
93%|ββββββββββ| 9925/10682 [1:24:22<13:50, 1.10s/it]{'loss': 2.8021, 'grad_norm': 0.26082977652549744, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.01}
|
701 |
|
702 |
-
|
703 |
93%|ββββββββββ| 9925/10682 [1:24:22<13:50, 1.10s/it]
|
704 |
93%|ββββββββββ| 9926/10682 [1:24:23<11:33, 1.09it/s]
|
705 |
93%|ββββββββββ| 9927/10682 [1:24:23<09:56, 1.27it/s]
|
706 |
93%|ββββββββββ| 9928/10682 [1:24:24<08:49, 1.43it/s]
|
707 |
93%|ββββββββββ| 9929/10682 [1:24:24<08:01, 1.56it/s]
|
708 |
93%|ββββββββββ| 9930/10682 [1:24:25<07:27, 1.68it/s]
|
709 |
93%|ββββββββββ| 9931/10682 [1:24:25<07:04, 1.77it/s]
|
710 |
93%|ββββββββββ| 9932/10682 [1:24:26<06:47, 1.84it/s]
|
711 |
93%|ββββββββββ| 9933/10682 [1:24:26<06:35, 1.89it/s]
|
712 |
93%|ββββββββββ| 9934/10682 [1:24:27<06:27, 1.93it/s]
|
713 |
93%|ββββββββββ| 9935/10682 [1:24:27<06:21, 1.96it/s]
|
714 |
93%|ββββββββββ| 9936/10682 [1:24:28<06:17, 1.98it/s]
|
715 |
93%|ββββββββββ| 9937/10682 [1:24:28<06:14, 1.99it/s]
|
716 |
93%|ββββββββββ| 9938/10682 [1:24:29<06:11, 2.00it/s]
|
717 |
93%|ββββββββββ| 9939/10682 [1:24:29<06:10, 2.01it/s]
|
718 |
93%|ββββββββββ| 9940/10682 [1:24:30<06:08, 2.01it/s]
|
719 |
93%|ββββββββββ| 9941/10682 [1:24:30<06:07, 2.01it/s]
|
720 |
93%|ββββββββββ| 9942/10682 [1:24:31<06:06, 2.02it/s]
|
721 |
93%|ββββββββββ| 9943/10682 [1:24:31<06:07, 2.01it/s]
|
722 |
93%|ββββββββββ| 9944/10682 [1:24:32<06:06, 2.01it/s]
|
723 |
93%|ββββββββββ| 9945/10682 [1:24:32<06:05, 2.02it/s]
|
724 |
93%|ββββββββββ| 9946/10682 [1:24:33<06:04, 2.02it/s]
|
725 |
93%|ββββββββββ| 9947/10682 [1:24:33<06:03, 2.02it/s]
|
|
|
726 |
93%|ββββββββββ| 9925/10682 [1:24:22<13:50, 1.10s/it]
|
727 |
93%|ββββββββββ| 9926/10682 [1:24:23<11:33, 1.09it/s]
|
728 |
93%|ββββββββββ| 9927/10682 [1:24:23<09:56, 1.27it/s]
|
729 |
93%|ββββββββββ| 9928/10682 [1:24:24<08:49, 1.43it/s]
|
730 |
93%|ββββββββββ| 9929/10682 [1:24:24<08:01, 1.56it/s]
|
731 |
93%|ββββββββββ| 9930/10682 [1:24:25<07:27, 1.68it/s]
|
732 |
93%|ββββββββββ| 9931/10682 [1:24:25<07:04, 1.77it/s]
|
733 |
93%|ββββββββββ| 9932/10682 [1:24:26<06:47, 1.84it/s]
|
734 |
93%|ββββββββββ| 9933/10682 [1:24:26<06:35, 1.89it/s]
|
735 |
93%|ββββββββββ| 9934/10682 [1:24:27<06:27, 1.93it/s]
|
736 |
93%|ββββββββββ| 9935/10682 [1:24:27<06:21, 1.96it/s]
|
737 |
93%|ββββββββββ| 9936/10682 [1:24:28<06:17, 1.98it/s]
|
738 |
93%|ββββββββββ| 9937/10682 [1:24:28<06:14, 1.99it/s]
|
739 |
93%|ββββββββββ| 9938/10682 [1:24:29<06:11, 2.00it/s]
|
740 |
93%|ββββββββββ| 9939/10682 [1:24:29<06:10, 2.01it/s]
|
741 |
93%|ββββββββββ| 9940/10682 [1:24:30<06:08, 2.01it/s]
|
742 |
93%|ββββββββββ| 9941/10682 [1:24:30<06:07, 2.01it/s]
|
743 |
93%|ββββββββββ| 9942/10682 [1:24:31<06:06, 2.02it/s]
|
744 |
93%|ββββββββββ| 9943/10682 [1:24:31<06:07, 2.01it/s]
|
745 |
93%|ββββββββββ| 9944/10682 [1:24:32<06:06, 2.01it/s]
|
746 |
93%|ββββββββββ| 9945/10682 [1:24:32<06:05, 2.02it/s]
|
747 |
93%|ββββββββββ| 9946/10682 [1:24:33<06:04, 2.02it/s]
|
748 |
93%|ββββββββββ| 9947/10682 [1:24:33<06:03, 2.02it/s]
|
749 |
93%|ββββββββββ| 9948/10682 [1:24:34<06:02, 2.03it/s]
|
750 |
93%|ββββββββββ| 9949/10682 [1:24:34<06:01, 2.03it/s]
|
751 |
93%|ββββββββββ| 9950/10682 [1:24:35<06:00, 2.03it/s]{'loss': 2.7759, 'grad_norm': 0.26722249388694763, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.04}
|
752 |
|
|
|
753 |
93%|ββββββββββ| 9950/10682 [1:24:35<06:00, 2.03it/s]
|
754 |
93%|ββββββββββ| 9951/10682 [1:24:35<06:01, 2.02it/s]
|
755 |
93%|ββββββββββ| 9952/10682 [1:24:36<06:00, 2.03it/s]
|
756 |
93%|ββββββββββ| 9953/10682 [1:24:36<05:59, 2.03it/s]
|
757 |
93%|ββββββββββ| 9954/10682 [1:24:37<05:59, 2.03it/s]
|
758 |
93%|ββββββββββ| 9955/10682 [1:24:37<05:58, 2.03it/s]
|
759 |
93%|ββββββββββ| 9956/10682 [1:24:38<05:58, 2.03it/s]
|
760 |
93%|ββββββββββ| 9957/10682 [1:24:38<05:57, 2.03it/s]
|
761 |
93%|ββββββββββ| 9958/10682 [1:24:39<05:56, 2.03it/s]
|
762 |
93%|ββββββββββ| 9959/10682 [1:24:39<05:56, 2.03it/s]
|
763 |
93%|ββββββββββ| 9960/10682 [1:24:40<05:55, 2.03it/s]
|
764 |
93%|ββββββββββ| 9961/10682 [1:24:40<05:55, 2.03it/s]
|
765 |
93%|ββββββββββ| 9962/10682 [1:24:41<05:54, 2.03it/s]
|
766 |
93%|ββββββββββ| 9963/10682 [1:24:41<05:54, 2.03it/s]
|
767 |
93%|ββββββββββ| 9964/10682 [1:24:42<05:53, 2.03it/s]
|
768 |
93%|ββββββββββ| 9965/10682 [1:24:42<05:53, 2.03it/s]
|
769 |
93%|ββββββββββ| 9966/10682 [1:24:43<05:52, 2.03it/s]
|
770 |
93%|ββββββββββ| 9967/10682 [1:24:43<05:51, 2.03it/s]
|
771 |
93%|ββββββββββ| 9968/10682 [1:24:44<05:51, 2.03it/s]
|
772 |
93%|ββββββββββ| 9969/10682 [1:24:44<05:50, 2.03it/s]
|
773 |
93%|ββββββββββ| 9970/10682 [1:24:45<05:49, 2.03it/s]
|
774 |
93%|ββββββββββ| 9971/10682 [1:24:45<05:49, 2.03it/s]
|
775 |
93%|ββββββββββ| 9972/10682 [1:24:46<05:49, 2.03it/s]
|
776 |
93%|ββββββββββ| 9973/10682 [1:24:46<05:48, 2.03it/s]
|
777 |
93%|ββββββββββ| 9974/10682 [1:24:47<05:48, 2.03it/s]
|
778 |
93%|ββββββββββ| 9975/10682 [1:24:47<05:48, 2.03it/s]
|
779 |
|
|
|
780 |
93%|ββββββββββ| 9975/10682 [1:24:47<05:48, 2.03it/s]
|
781 |
93%|ββββββββββ| 9976/10682 [1:24:48<05:48, 2.03it/s]
|
782 |
93%|ββββββββββ| 9977/10682 [1:24:48<05:47, 2.03it/s]
|
783 |
93%|ββββββββββ| 9978/10682 [1:24:49<05:46, 2.03it/s]
|
784 |
93%|ββββββββββ| 9979/10682 [1:24:49<05:46, 2.03it/s]
|
785 |
93%|ββββββββββ| 9980/10682 [1:24:50<05:45, 2.03it/s]
|
786 |
93%|ββββββββββ| 9981/10682 [1:24:50<05:45, 2.03it/s]
|
787 |
93%|ββββββββββ| 9982/10682 [1:24:51<05:44, 2.03it/s]
|
788 |
93%|ββββββββββ| 9983/10682 [1:24:51<05:44, 2.03it/s]
|
789 |
93%|ββββββββββ| 9984/10682 [1:24:52<05:43, 2.03it/s]
|
790 |
93%|ββββββββββ| 9985/10682 [1:24:52<05:43, 2.03it/s]
|
791 |
93%|ββββββββββ| 9986/10682 [1:24:53<05:43, 2.03it/s]
|
792 |
93%|ββββββββββ| 9987/10682 [1:24:53<05:42, 2.03it/s]
|
793 |
94%|ββββββββββ| 9988/10682 [1:24:53<05:41, 2.03it/s]
|
794 |
94%|ββββββββββ| 9989/10682 [1:24:54<05:41, 2.03it/s]
|
795 |
94%|ββββββββββ| 9990/10682 [1:24:54<05:40, 2.03it/s]
|
796 |
94%|ββββββββββ| 9991/10682 [1:24:55<05:39, 2.03it/s]
|
797 |
94%|ββββββββββ| 9992/10682 [1:24:55<05:39, 2.03it/s]
|
798 |
94%|ββββββββββ| 9993/10682 [1:24:56<05:38, 2.03it/s]
|
799 |
94%|ββββββββββ| 9994/10682 [1:24:56<05:38, 2.03it/s]
|
800 |
94%|ββββββββββ| 9995/10682 [1:24:57<05:38, 2.03it/s]
|
801 |
94%|ββββββββββ| 9996/10682 [1:24:57<05:37, 2.03it/s]
|
802 |
94%|ββββββββββ| 9997/10682 [1:24:58<05:37, 2.03it/s]
|
803 |
94%|ββββββββββ| 9998/10682 [1:24:58<05:36, 2.03it/s]
|
804 |
94%|ββββββββββ| 9999/10682 [1:24:59<05:35, 2.03it/s]
|
805 |
94%|ββββββββββ| 10000/10682 [1:24:59<05:35, 2.03it/s]
|
806 |
|
|
|
807 |
94%|ββββββββββ| 10000/10682 [1:24:59<05:35, 2.03it/s]
|
808 |
94%|ββββββββββ| 10001/10682 [1:25:00<05:35, 2.03it/s]
|
809 |
94%|ββββββββββ| 10002/10682 [1:25:00<05:35, 2.03it/s]
|
810 |
94%|ββββββββββ| 10003/10682 [1:25:01<05:34, 2.03it/s]
|
811 |
94%|ββββββββββ| 10004/10682 [1:25:01<05:33, 2.03it/s]
|
812 |
94%|ββββββββββ| 10005/10682 [1:25:02<05:33, 2.03it/s]
|
813 |
94%|ββββββββββ| 10006/10682 [1:25:02<05:32, 2.03it/s]
|
814 |
94%|ββββββββββ| 10007/10682 [1:25:03<05:32, 2.03it/s]
|
815 |
94%|ββββββββββ| 10008/10682 [1:25:03<05:31, 2.03it/s]
|
816 |
94%|ββββββββββ| 10009/10682 [1:25:04<05:31, 2.03it/s]
|
817 |
94%|ββββββββββ| 10010/10682 [1:25:04<05:31, 2.03it/s]
|
818 |
94%|ββββββββββ| 10011/10682 [1:25:05<05:30, 2.03it/s]
|
819 |
94%|ββββββββββ| 10012/10682 [1:25:05<05:30, 2.03it/s]
|
820 |
94%|ββββββββββ| 10013/10682 [1:25:06<05:29, 2.03it/s]
|
821 |
94%|ββββββββββ| 10014/10682 [1:25:06<05:28, 2.03it/s]
|
822 |
94%|ββββββββββ| 10015/10682 [1:25:07<05:28, 2.03it/s]
|
823 |
94%|ββββββββββ| 10016/10682 [1:25:07<05:27, 2.03it/s]
|
824 |
94%|ββββββββββ| 10017/10682 [1:25:08<05:27, 2.03it/s]
|
825 |
94%|ββββββββββ| 10018/10682 [1:25:08<05:27, 2.03it/s]
|
826 |
94%|ββββββββββ| 10019/10682 [1:25:09<05:26, 2.03it/s]
|
827 |
94%|ββββββββββ| 10020/10682 [1:25:09<05:26, 2.03it/s]
|
828 |
94%|ββββββββββ| 10021/10682 [1:25:10<05:25, 2.03it/s]
|
829 |
94%|ββββββββββ| 10022/10682 [1:25:10<05:25, 2.03it/s]
|
830 |
94%|ββββββββββ| 10023/10682 [1:25:11<05:24, 2.03it/s]
|
831 |
94%|ββββββββββ| 10024/10682 [1:25:11<05:24, 2.03it/s]
|
832 |
94%|ββββββββββ| 10025/10682 [1:25:12<05:23, 2.03it/s]
|
833 |
|
|
|
834 |
94%|ββββββββββ| 10025/10682 [1:25:12<05:23, 2.03it/s]
|
835 |
94%|ββββββββββ| 10026/10682 [1:25:12<05:23, 2.03it/s]
|
836 |
94%|ββββββββββ| 10027/10682 [1:25:13<05:23, 2.03it/s]
|
837 |
94%|ββββββββββ| 10028/10682 [1:25:13<05:22, 2.03it/s]
|
838 |
94%|ββββββββββ| 10029/10682 [1:25:14<05:22, 2.03it/s]
|
839 |
94%|ββββββββββ| 10030/10682 [1:25:14<05:21, 2.03it/s]
|
840 |
94%|ββββββββββ| 10031/10682 [1:25:15<05:20, 2.03it/s]
|
841 |
94%|ββββββββββ| 10032/10682 [1:25:15<05:20, 2.03it/s]
|
842 |
94%|ββββββββββ| 10033/10682 [1:25:16<05:19, 2.03it/s]
|
843 |
94%|ββββββββββ| 10034/10682 [1:25:16<05:19, 2.03it/s]
|
844 |
94%|ββββββββββ| 10035/10682 [1:25:17<05:18, 2.03it/s]
|
845 |
94%|ββββββββββ| 10036/10682 [1:25:17<05:18, 2.03it/s]
|
846 |
94%|ββββββββββ| 10037/10682 [1:25:18<05:17, 2.03it/s]
|
847 |
94%|ββββββββββ| 10038/10682 [1:25:18<05:17, 2.03it/s]
|
848 |
94%|ββββββββββ| 10039/10682 [1:25:19<05:16, 2.03it/s]
|
849 |
94%|ββββββββββ| 10040/10682 [1:25:19<05:16, 2.03it/s]
|
850 |
94%|ββββββββββ| 10041/10682 [1:25:20<05:15, 2.03it/s]
|
851 |
94%|ββββββββββ| 10042/10682 [1:25:20<05:15, 2.03it/s]
|
852 |
94%|ββββββββββ| 10043/10682 [1:25:21<05:14, 2.03it/s]
|
853 |
94%|ββββββββββ| 10044/10682 [1:25:21<05:14, 2.03it/s]
|
854 |
94%|ββββββββββ| 10045/10682 [1:25:22<05:13, 2.03it/s]
|
855 |
94%|ββββββββββ| 10046/10682 [1:25:22<05:13, 2.03it/s]
|
856 |
94%|ββββββββββ| 10047/10682 [1:25:23<05:12, 2.03it/s]
|
857 |
94%|ββββββββββ| 10048/10682 [1:25:23<05:12, 2.03it/s]
|
858 |
94%|ββββββββββ| 10049/10682 [1:25:24<05:11, 2.03it/s]
|
859 |
94%|ββββββββββ| 10050/10682 [1:25:24<05:11, 2.03it/s]
|
860 |
{'loss': 2.7813, 'grad_norm': 0.2624559998512268, 'learning_rate': 1.0627021937013704e-05, 'epoch': 13.17}
|
|
|
861 |
94%|ββββββββββ| 10050/10682 [1:25:24<05:11, 2.03it/s]
|
862 |
94%|ββββββββββ| 10051/10682 [1:25:25<05:11, 2.03it/s]
|
863 |
94%|ββββββββββ| 10052/10682 [1:25:25<05:10, 2.03it/s]
|
864 |
94%|ββββββββββ| 10053/10682 [1:25:25<05:09, 2.03it/s]
|
865 |
94%|ββββββββββ| 10054/10682 [1:25:26<05:09, 2.03it/s]
|
866 |
94%|ββββββββββ| 10055/10682 [1:25:26<05:08, 2.03it/s]
|
867 |
94%|ββββββββββ| 10056/10682 [1:25:27<05:08, 2.03it/s]
|
868 |
94%|ββββββββββ| 10057/10682 [1:25:27<05:08, 2.03it/s]
|
869 |
94%|ββββββββββ| 10058/10682 [1:25:28<05:07, 2.03it/s]
|
870 |
94%|ββββββββββ| 10059/10682 [1:25:28<05:07, 2.03it/s]
|
871 |
94%|ββββββββββ| 10060/10682 [1:25:29<05:06, 2.03it/s]
|
872 |
94%|ββββββββββ| 10061/10682 [1:25:29<05:06, 2.03it/s]
|
873 |
94%|ββββββββββ| 10062/10682 [1:25:30<05:05, 2.03it/s]
|
874 |
94%|ββββββββββ| 10063/10682 [1:25:30<05:04, 2.03it/s]
|
875 |
94%|ββββββββββ| 10064/10682 [1:25:31<05:04, 2.03it/s]
|
876 |
94%|ββββββββββ| 10065/10682 [1:25:31<05:03, 2.03it/s]
|
877 |
94%|ββββββββββ| 10066/10682 [1:25:32<05:03, 2.03it/s]
|
878 |
94%|ββββββββββ| 10067/10682 [1:25:32<05:02, 2.03it/s]
|
879 |
94%|ββββββββββ| 10068/10682 [1:25:33<05:02, 2.03it/s]
|
880 |
94%|ββββββββββ| 10069/10682 [1:25:33<05:02, 2.03it/s]
|
881 |
94%|ββββββββββ| 10070/10682 [1:25:34<05:01, 2.03it/s]
|
882 |
94%|ββββββββββ| 10071/10682 [1:25:34<05:00, 2.03it/s]
|
883 |
94%|ββββββββββ| 10072/10682 [1:25:35<05:00, 2.03it/s]
|
884 |
94%|ββββββββββ| 10073/10682 [1:25:35<04:59, 2.03it/s]
|
885 |
94%|ββββββββββ| 10074/10682 [1:25:36<04:59, 2.03it/s]
|
886 |
94%|ββββββββββ| 10075/10682 [1:25:36<04:58, 2.03it/s]
|
887 |
|
|
|
888 |
94%|ββββββββββ| 10075/10682 [1:25:36<04:58, 2.03it/s]
|
889 |
94%|ββββββββββ| 10076/10682 [1:25:37<04:59, 2.03it/s]
|
890 |
94%|ββββββββββ| 10077/10682 [1:25:37<04:58, 2.03it/s]
|
891 |
94%|ββββββββββ| 10078/10682 [1:25:38<04:57, 2.03it/s]
|
892 |
94%|ββββββββββ| 10079/10682 [1:25:38<04:57, 2.03it/s]
|
893 |
94%|ββββββββββ| 10080/10682 [1:25:39<04:56, 2.03it/s]
|
894 |
94%|ββββββββββ| 10081/10682 [1:25:39<04:56, 2.03it/s]
|
895 |
94%|ββββββββββ| 10082/10682 [1:25:40<04:56, 2.03it/s]
|
896 |
94%|ββββββββββ| 10083/10682 [1:25:40<04:55, 2.02it/s]
|
897 |
94%|ββββββββββ| 10084/10682 [1:25:41<04:55, 2.03it/s]
|
898 |
94%|ββββββββββ| 10085/10682 [1:25:41<04:54, 2.03it/s]
|
899 |
94%|ββββββββββ| 10086/10682 [1:25:42<04:54, 2.03it/s]
|
900 |
94%|ββββββββββ| 10087/10682 [1:25:42<04:53, 2.03it/s]
|
901 |
94%|ββββββββββ| 10088/10682 [1:25:43<04:53, 2.03it/s]
|
902 |
94%|ββββββββββ| 10089/10682 [1:25:43<05:19, 1.86it/s]
|
903 |
94%|ββββββββββ| 10090/10682 [1:25:44<05:10, 1.91it/s]
|
904 |
94%|ββββββββββ| 10091/10682 [1:25:44<05:04, 1.94it/s]
|
905 |
94%|ββββββββββ| 10092/10682 [1:25:45<04:59, 1.97it/s]
|
906 |
94%|ββββββββββ| 10093/10682 [1:25:45<04:56, 1.99it/s]
|
907 |
94%|ββββββββββ| 10094/10682 [1:25:46<04:54, 2.00it/s]
|
908 |
95%|ββββββββββ| 10095/10682 [1:25:46<04:52, 2.01it/s]
|
909 |
95%|ββββββββββ| 10096/10682 [1:25:47<04:50, 2.01it/s]
|
910 |
95%|ββββββββββ| 10097/10682 [1:25:47<04:49, 2.02it/s]
|
911 |
95%|ββββββββββ| 10098/10682 [1:25:48<04:48, 2.02it/s]
|
912 |
95%|ββββββββββ| 10099/10682 [1:25:48<04:47, 2.03it/s]
|
913 |
95%|ββββββββββ| 10100/10682 [1:25:49<04:47, 2.03it/s]{'loss': 2.7786, 'grad_norm': 0.26479992270469666, 'learning_rate': 9.01691900753926e-06, 'epoch': 13.24}
|
|
|
914 |
|
915 |
95%|ββββββββββ| 10100/10682 [1:25:49<04:47, 2.03it/s]
|
916 |
95%|ββββββββββ| 10101/10682 [1:25:49<04:46, 2.03it/s]
|
917 |
95%|ββββββββββ| 10102/10682 [1:25:50<04:46, 2.03it/s]
|
918 |
95%|ββββββββββ| 10103/10682 [1:25:50<04:45, 2.03it/s]
|
919 |
95%|ββββββββββ| 10104/10682 [1:25:51<04:45, 2.03it/s]
|
920 |
95%|ββββββββββ| 10105/10682 [1:25:51<04:44, 2.03it/s]
|
921 |
95%|ββββββββββ| 10106/10682 [1:25:52<04:43, 2.03it/s]
|
922 |
95%|ββββββββββ| 10107/10682 [1:25:52<04:43, 2.03it/s]
|
923 |
95%|ββββββββββ| 10108/10682 [1:25:53<04:42, 2.03it/s]
|
924 |
95%|ββββββββββ| 10109/10682 [1:25:53<04:42, 2.03it/s]
|
925 |
95%|ββββββββββ| 10110/10682 [1:25:54<04:41, 2.03it/s]
|
926 |
95%|ββββββββββ| 10111/10682 [1:25:54<04:41, 2.03it/s]
|
927 |
95%|ββββββββββ| 10112/10682 [1:25:55<04:40, 2.03it/s]
|
928 |
95%|ββββββββββ| 10113/10682 [1:25:55<04:40, 2.03it/s]
|
929 |
95%|ββββββββββ| 10114/10682 [1:25:56<04:39, 2.03it/s]
|
930 |
95%|ββββββββββ| 10115/10682 [1:25:56<04:39, 2.03it/s]
|
931 |
95%|ββββββββββ| 10116/10682 [1:25:57<04:38, 2.03it/s]
|
932 |
95%|ββββββββββ| 10117/10682 [1:25:57<04:38, 2.03it/s]
|
933 |
95%|ββββββββββ| 10118/10682 [1:25:58<04:38, 2.03it/s]
|
934 |
95%|ββββββββββ| 10119/10682 [1:25:58<04:37, 2.03it/s]
|
935 |
95%|ββββββββββ| 10120/10682 [1:25:59<04:37, 2.03it/s]
|
936 |
95%|ββββββββββ| 10121/10682 [1:25:59<04:36, 2.03it/s]
|
937 |
95%|ββββββββββ| 10122/10682 [1:26:00<04:36, 2.03it/s]
|
938 |
95%|ββββββββββ| 10123/10682 [1:26:00<04:35, 2.03it/s]
|
939 |
95%|ββββββββββ| 10124/10682 [1:26:01<04:35, 2.03it/s]
|
940 |
95%|ββββββββββ| 10125/10682 [1:26:01<04:34, 2.03it/s]
|
941 |
|
|
|
942 |
95%|ββββββββββ| 10125/10682 [1:26:01<04:34, 2.03it/s]
|
943 |
95%|ββββββββββ| 10126/10682 [1:26:02<04:34, 2.03it/s]
|
944 |
95%|ββββββββββ| 10127/10682 [1:26:02<04:33, 2.03it/s]
|
945 |
95%|ββββββββββ| 10128/10682 [1:26:03<04:32, 2.03it/s]
|
946 |
95%|ββββββββββ| 10129/10682 [1:26:03<04:32, 2.03it/s]
|
947 |
95%|βββββββββοΏ½οΏ½| 10130/10682 [1:26:04<04:31, 2.03it/s]
|
948 |
95%|ββββββββββ| 10131/10682 [1:26:04<04:31, 2.03it/s]
|
949 |
95%|ββββββββββ| 10132/10682 [1:26:05<04:31, 2.03it/s]
|
950 |
95%|ββββββββββ| 10133/10682 [1:26:05<04:30, 2.03it/s]
|
951 |
95%|ββββββββββ| 10134/10682 [1:26:06<04:30, 2.03it/s]
|
952 |
95%|ββββββββββ| 10135/10682 [1:26:06<04:29, 2.03it/s]
|
953 |
95%|ββββββββββ| 10136/10682 [1:26:07<04:29, 2.03it/s]
|
954 |
95%|ββββββββββ| 10137/10682 [1:26:07<04:28, 2.03it/s]
|
955 |
95%|ββββββββββ| 10138/10682 [1:26:08<04:27, 2.03it/s]
|
956 |
95%|ββββββββββ| 10139/10682 [1:26:08<04:27, 2.03it/s]
|
957 |
95%|ββββββββββ| 10140/10682 [1:26:09<04:26, 2.03it/s]
|
958 |
95%|ββββββββββ| 10141/10682 [1:26:09<04:26, 2.03it/s]
|
959 |
95%|ββββββββββ| 10142/10682 [1:26:10<04:26, 2.03it/s]
|
960 |
95%|ββββββββββ| 10143/10682 [1:26:10<04:25, 2.03it/s]
|
961 |
95%|ββββββββββ| 10144/10682 [1:26:10<04:25, 2.03it/s]
|
962 |
95%|ββββββββββ| 10145/10682 [1:26:11<04:24, 2.03it/s]
|
963 |
95%|ββββββββββ| 10146/10682 [1:26:11<04:24, 2.03it/s]
|
964 |
95%|ββββββββββ| 10147/10682 [1:26:12<04:23, 2.03it/s]
|
965 |
95%|ββββββββββ| 10148/10682 [1:26:12<04:22, 2.03it/s]
|
966 |
95%|ββββββββββ| 10149/10682 [1:26:13<04:22, 2.03it/s]
|
967 |
95%|ββββββββββ| 10150/10682 [1:26:13<04:21, 2.03it/s]{'loss': 2.7769, 'grad_norm': 0.26382389664649963, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.3}
|
968 |
|
|
|
969 |
95%|ββββββββββ| 10150/10682 [1:26:13<04:21, 2.03it/s]
|
970 |
95%|ββββββββββ| 10151/10682 [1:26:14<04:21, 2.03it/s]
|
971 |
95%|ββββββββββ| 10152/10682 [1:26:14<04:21, 2.03it/s]
|
972 |
95%|ββββββββββ| 10153/10682 [1:26:15<04:21, 2.03it/s]
|
973 |
95%|ββββββββββ| 10154/10682 [1:26:15<04:20, 2.03it/s]
|
974 |
95%|ββββββββββ| 10155/10682 [1:26:16<04:19, 2.03it/s]
|
975 |
95%|ββββββββββ| 10156/10682 [1:26:16<04:19, 2.03it/s]
|
976 |
95%|ββββββββββ| 10157/10682 [1:26:17<04:18, 2.03it/s]
|
977 |
95%|ββββββββββ| 10158/10682 [1:26:17<04:18, 2.03it/s]
|
978 |
95%|ββββββββββ| 10159/10682 [1:26:18<04:17, 2.03it/s]
|
979 |
95%|ββββββββββ| 10160/10682 [1:26:18<04:17, 2.03it/s]
|
980 |
95%|ββββββββββ| 10161/10682 [1:26:19<04:16, 2.03it/s]
|
981 |
95%|ββββββββββ| 10162/10682 [1:26:19<04:16, 2.03it/s]
|
982 |
95%|ββββββββββ| 10163/10682 [1:26:20<04:15, 2.03it/s]
|
983 |
95%|ββββββββββ| 10164/10682 [1:26:20<04:15, 2.03it/s]
|
984 |
95%|ββββββββββ| 10165/10682 [1:26:21<04:15, 2.03it/s]
|
985 |
95%|ββββββββββ| 10166/10682 [1:26:21<04:14, 2.03it/s]
|
986 |
95%|ββββββββββ| 10167/10682 [1:26:22<04:14, 2.03it/s]
|
987 |
95%|ββββββββββ| 10168/10682 [1:26:22<04:13, 2.03it/s]
|
988 |
95%|ββββββββββ| 10169/10682 [1:26:23<04:12, 2.03it/s]
|
989 |
95%|ββββββββββ| 10170/10682 [1:26:23<04:12, 2.03it/s]
|
990 |
95%|ββββββββββ| 10171/10682 [1:26:24<04:11, 2.03it/s]
|
991 |
95%|ββββββββββ| 10172/10682 [1:26:24<04:11, 2.03it/s]
|
992 |
95%|ββββββββββ| 10173/10682 [1:26:25<04:10, 2.03it/s]
|
993 |
95%|ββββββββββ| 10174/10682 [1:26:25<04:10, 2.03it/s]
|
994 |
95%|ββββββββββ| 10175/10682 [1:26:26<04:09, 2.03it/s]{'loss': 2.7838, 'grad_norm': 0.26716604828834534, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.34}
|
|
|
995 |
|
996 |
95%|ββββββββββ| 10175/10682 [1:26:26<04:09, 2.03it/s]
|
997 |
95%|ββββββββββ| 10176/10682 [1:26:26<04:09, 2.03it/s]
|
998 |
95%|ββββββββββ| 10177/10682 [1:26:27<04:08, 2.03it/s]
|
999 |
95%|ββββββββββ| 10178/10682 [1:26:27<04:08, 2.03it/s]
|
1000 |
95%|ββββββββββ| 10179/10682 [1:26:28<04:07, 2.03it/s]
|
1001 |
95%|ββββββββββ| 10180/10682 [1:26:28<04:07, 2.03it/s]
|
1002 |
95%|ββββββββββ| 10181/10682 [1:26:29<04:06, 2.03it/s]
|
1003 |
95%|ββββββββββ| 10182/10682 [1:26:29<04:06, 2.03it/s]
|
1004 |
95%|ββββββββββ| 10183/10682 [1:26:30<04:06, 2.03it/s]
|
1005 |
95%|ββββββββββ| 10184/10682 [1:26:30<04:05, 2.03it/s]
|
1006 |
95%|ββββββββββ| 10185/10682 [1:26:31<04:04, 2.03it/s]
|
1007 |
95%|ββββββββββ| 10186/10682 [1:26:31<04:04, 2.03it/s]
|
1008 |
95%|ββββββββββ| 10187/10682 [1:26:32<04:03, 2.03it/s]
|
1009 |
95%|ββββββββββ| 10188/10682 [1:26:32<04:03, 2.03it/s]
|
1010 |
95%|ββββββββββ| 10189/10682 [1:26:33<04:02, 2.03it/s]
|
1011 |
95%|ββββββββββ| 10190/10682 [1:26:33<04:02, 2.03it/s]
|
1012 |
95%|ββββββββββ| 10191/10682 [1:26:34<04:02, 2.03it/s]
|
1013 |
95%|ββββββββββ| 10192/10682 [1:26:34<04:01, 2.03it/s]
|
1014 |
95%|ββββββββββ| 10193/10682 [1:26:35<04:01, 2.03it/s]
|
1015 |
95%|ββββββββββ| 10194/10682 [1:26:35<04:00, 2.03it/s]
|
1016 |
95%|ββββββββββ| 10195/10682 [1:26:36<04:00, 2.03it/s]
|
1017 |
95%|ββββββββββ| 10196/10682 [1:26:36<03:59, 2.03it/s]
|
1018 |
95%|ββββββββββ| 10197/10682 [1:26:37<03:59, 2.03it/s]
|
1019 |
95%|ββββββββββ| 10198/10682 [1:26:37<03:58, 2.03it/s]
|
1020 |
95%|ββββββββββ| 10199/10682 [1:26:38<03:58, 2.03it/s]
|
1021 |
95%|ββββββββββ| 10200/10682 [1:26:38<03:57, 2.03it/s]
|
1022 |
|
|
|
1023 |
95%|ββββββββββ| 10200/10682 [1:26:38<03:57, 2.03it/s]
|
1024 |
95%|ββββββββββ| 10201/10682 [1:26:39<03:57, 2.03it/s]
|
1025 |
96%|ββββββββββ| 10202/10682 [1:26:39<03:56, 2.03it/s]
|
1026 |
96%|ββββββββββ| 10203/10682 [1:26:40<03:56, 2.03it/s]
|
1027 |
96%|ββββββββββ| 10204/10682 [1:26:40<03:55, 2.03it/s]
|
1028 |
96%|ββββββββββ| 10205/10682 [1:26:41<03:55, 2.03it/s]
|
1029 |
96%|ββββββββββ| 10206/10682 [1:26:41<03:54, 2.03it/s]
|
1030 |
96%|ββββββββββ| 10207/10682 [1:26:42<03:54, 2.03it/s]
|
1031 |
96%|ββββββββββ| 10208/10682 [1:26:42<03:53, 2.03it/s]
|
1032 |
96%|ββββββββββ| 10209/10682 [1:26:43<03:53, 2.03it/s]
|
1033 |
96%|ββββββββββ| 10210/10682 [1:26:43<03:52, 2.03it/s]
|
1034 |
96%|ββββββββββ| 10211/10682 [1:26:44<04:11, 1.87it/s]
|
1035 |
96%|ββββββββββ| 10212/10682 [1:26:44<04:05, 1.91it/s]
|
1036 |
96%|ββββββββββ| 10213/10682 [1:26:45<04:00, 1.95it/s]
|
1037 |
96%|ββββββββββ| 10214/10682 [1:26:45<03:57, 1.97it/s]
|
1038 |
96%|ββββββββββ| 10215/10682 [1:26:46<03:54, 1.99it/s]
|
1039 |
96%|ββββββββββ| 10216/10682 [1:26:46<03:53, 2.00it/s]
|
1040 |
96%|ββββββββββ| 10217/10682 [1:26:47<03:51, 2.01it/s]
|
1041 |
96%|ββββββββββ| 10218/10682 [1:26:47<03:50, 2.02it/s]
|
1042 |
96%|ββββββββββ| 10219/10682 [1:26:48<03:49, 2.02it/s]
|
1043 |
96%|ββββββββββ| 10220/10682 [1:26:48<03:48, 2.02it/s]
|
1044 |
96%|ββββββββββ| 10221/10682 [1:26:49<03:47, 2.03it/s]
|
1045 |
96%|ββββββββββ| 10222/10682 [1:26:49<03:47, 2.03it/s]
|
1046 |
96%|ββββββββββ| 10223/10682 [1:26:50<03:46, 2.03it/s]
|
1047 |
96%|ββββββββββ| 10224/10682 [1:26:50<03:46, 2.03it/s]
|
1048 |
96%|ββββββββββ| 10225/10682 [1:26:51<03:45, 2.03it/s]
|
1049 |
|
|
|
1050 |
96%|ββββββββββ| 10225/10682 [1:26:51<03:45, 2.03it/s]
|
1051 |
96%|ββββββββββ| 10226/10682 [1:26:51<03:45, 2.02it/s]
|
1052 |
96%|ββββββββββ| 10227/10682 [1:26:52<03:44, 2.03it/s]
|
1053 |
96%|ββββββββββ| 10228/10682 [1:26:52<03:43, 2.03it/s]
|
1054 |
96%|ββββββββββ| 10229/10682 [1:26:53<03:43, 2.03it/s]
|
1055 |
96%|ββββββββββ| 10230/10682 [1:26:53<03:42, 2.03it/s]
|
1056 |
96%|ββββββββββ| 10231/10682 [1:26:54<03:42, 2.03it/s]
|
1057 |
96%|ββββββββββ| 10232/10682 [1:26:54<03:41, 2.03it/s]
|
1058 |
96%|ββββββββββ| 10233/10682 [1:26:55<03:41, 2.03it/s]
|
1059 |
96%|ββββββββββ| 10234/10682 [1:26:55<03:40, 2.03it/s]
|
1060 |
96%|ββββββββββ| 10235/10682 [1:26:55<03:40, 2.03it/s]
|
1061 |
96%|ββββββββββ| 10236/10682 [1:26:56<03:39, 2.03it/s]
|
1062 |
96%|ββββββββββ| 10237/10682 [1:26:56<03:39, 2.03it/s]
|
1063 |
96%|ββββββββββ| 10238/10682 [1:26:57<03:38, 2.03it/s]
|
1064 |
96%|ββββββββββ| 10239/10682 [1:26:57<03:37, 2.03it/s]
|
1065 |
96%|ββββββββββ| 10240/10682 [1:26:58<03:37, 2.03it/s]
|
1066 |
96%|ββββββββββ| 10241/10682 [1:26:58<03:37, 2.03it/s]
|
1067 |
96%|ββββββββββ| 10242/10682 [1:26:59<03:36, 2.03it/s]
|
1068 |
96%|ββββββββββ| 10243/10682 [1:26:59<03:36, 2.03it/s]
|
1069 |
96%|ββββββββββ| 10244/10682 [1:27:00<03:35, 2.03it/s]
|
1070 |
96%|ββββββββββ| 10245/10682 [1:27:00<03:35, 2.03it/s]
|
1071 |
96%|ββββββββββ| 10246/10682 [1:27:01<03:34, 2.03it/s]
|
1072 |
96%|ββββββββββ| 10247/10682 [1:27:01<03:34, 2.03it/s]
|
1073 |
96%|ββββββββββ| 10248/10682 [1:27:02<03:33, 2.03it/s]
|
1074 |
96%|ββββββββββ| 10249/10682 [1:27:02<03:33, 2.03it/s]
|
1075 |
96%|ββββββββββ| 10250/10682 [1:27:03<03:32, 2.03it/s]{'loss': 2.7947, 'grad_norm': 0.26169490814208984, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
|
|
1076 |
|
1077 |
96%|ββββββββββ| 10250/10682 [1:27:03<03:32, 2.03it/s]
|
1078 |
96%|ββββββββββ| 10251/10682 [1:27:03<03:32, 2.03it/s]
|
1079 |
96%|ββββββββββ| 10252/10682 [1:27:04<03:31, 2.03it/s]
|
1080 |
96%|ββββββββββ| 10253/10682 [1:27:04<03:31, 2.03it/s]
|
1081 |
96%|ββββββββββ| 10254/10682 [1:27:05<03:31, 2.03it/s]
|
1082 |
96%|ββββββββββ| 10255/10682 [1:27:05<03:30, 2.03it/s]
|
1083 |
96%|ββββββββββ| 10256/10682 [1:27:06<03:30, 2.03it/s]
|
1084 |
96%|ββββββββββ| 10257/10682 [1:27:06<03:29, 2.03it/s]
|
1085 |
96%|ββββββββββ| 10258/10682 [1:27:07<03:28, 2.03it/s]
|
1086 |
96%|ββββββββββ| 10259/10682 [1:27:07<03:28, 2.03it/s]
|
1087 |
96%|ββββββββββ| 10260/10682 [1:27:08<03:28, 2.03it/s]
|
1088 |
96%|ββββββββββ| 10261/10682 [1:27:08<03:27, 2.03it/s]
|
1089 |
96%|ββββββββββ| 10262/10682 [1:27:09<03:26, 2.03it/s]
|
1090 |
96%|ββββββββββ| 10263/10682 [1:27:09<03:26, 2.03it/s]
|
1091 |
96%|ββββββββββ| 10264/10682 [1:27:10<03:26, 2.03it/s]
|
1092 |
96%|ββββββββββ| 10265/10682 [1:27:10<03:25, 2.03it/s]
|
1093 |
96%|ββββββββββ| 10266/10682 [1:27:11<03:24, 2.03it/s]
|
1094 |
96%|ββββββββββ| 10267/10682 [1:27:11<03:24, 2.03it/s]
|
1095 |
96%|ββββββββββ| 10268/10682 [1:27:12<03:24, 2.03it/s]
|
1096 |
96%|ββββββββββ| 10269/10682 [1:27:12<03:23, 2.03it/s]
|
1097 |
96%|ββββββββββ| 10270/10682 [1:27:13<03:23, 2.03it/s]
|
1098 |
96%|ββββββββββ| 10271/10682 [1:27:13<03:22, 2.03it/s]
|
1099 |
96%|ββββββββββ| 10272/10682 [1:27:14<03:22, 2.03it/s]
|
1100 |
96%|ββββββββββ| 10273/10682 [1:27:14<03:21, 2.03it/s]
|
1101 |
96%|ββββββββββ| 10274/10682 [1:27:15<03:20, 2.03it/s]
|
1102 |
96%|ββββββββββ| 10275/10682 [1:27:15<03:20, 2.03it/s]{'loss': 2.7872, 'grad_norm': 0.2615707516670227, 'learning_rate': 4.416419388921844e-06, 'epoch': 13.47}
|
1103 |
|
|
|
1104 |
96%|ββββββββββ| 10275/10682 [1:27:15<03:20, 2.03it/s]
|
1105 |
96%|ββββββββββ| 10276/10682 [1:27:16<03:20, 2.03it/s]
|
1106 |
96%|ββββββββββ| 10277/10682 [1:27:16<03:19, 2.03it/s]
|
1107 |
96%|ββββββββββ| 10278/10682 [1:27:17<03:19, 2.03it/s]
|
1108 |
96%|ββββββββββ| 10279/10682 [1:27:17<03:18, 2.03it/s]
|
1109 |
96%|ββββββββββ| 10280/10682 [1:27:18<03:18, 2.03it/s]
|
1110 |
96%|ββββββββββ| 10281/10682 [1:27:18<03:17, 2.03it/s]
|
1111 |
96%|ββββββββββ| 10282/10682 [1:27:19<03:17, 2.03it/s]
|
1112 |
96%|ββββββββββ| 10283/10682 [1:27:19<03:16, 2.03it/s]
|
1113 |
96%|ββββββββββ| 10284/10682 [1:27:20<03:16, 2.03it/s]
|
1114 |
96%|ββββββββββ| 10285/10682 [1:27:20<03:15, 2.03it/s]
|
1115 |
96%|ββββββββββ| 10286/10682 [1:27:21<03:15, 2.03it/s]
|
1116 |
96%|ββββββββββ| 10287/10682 [1:27:21<03:14, 2.03it/s]
|
1117 |
96%|ββββββββββ| 10288/10682 [1:27:22<03:14, 2.03it/s]
|
1118 |
96%|ββββββββββ| 10289/10682 [1:27:22<03:13, 2.03it/s]
|
1119 |
96%|ββββββββββ| 10290/10682 [1:27:23<03:13, 2.03it/s]
|
1120 |
96%|ββββββββββ| 10291/10682 [1:27:23<03:12, 2.03it/s]
|
1121 |
96%|ββββββββββ| 10292/10682 [1:27:24<03:12, 2.03it/s]
|
1122 |
96%|ββββββββββ| 10293/10682 [1:27:24<03:11, 2.03it/s]
|
1123 |
96%|ββββββββββ| 10294/10682 [1:27:25<03:10, 2.03it/s]
|
1124 |
96%|ββββββββββ| 10295/10682 [1:27:25<03:10, 2.03it/s]
|
1125 |
96%|ββββββββββ| 10296/10682 [1:27:26<03:10, 2.03it/s]
|
1126 |
96%|ββββββββββ| 10297/10682 [1:27:26<03:09, 2.03it/s]
|
1127 |
96%|ββββββββββ| 10298/10682 [1:27:27<03:09, 2.03it/s]
|
1128 |
96%|ββββββββββ| 10299/10682 [1:27:27<03:08, 2.03it/s]
|
1129 |
96%|ββββββββββ| 10300/10682 [1:27:28<03:08, 2.03it/s]{'loss': 2.7753, 'grad_norm': 0.2637779116630554, 'learning_rate': 3.891208300917604e-06, 'epoch': 13.5}
|
|
|
1130 |
|
1131 |
96%|ββββββββββ| 10300/10682 [1:27:28<03:08, 2.03it/s]
|
1132 |
96%|ββββββββββ| 10301/10682 [1:27:28<03:07, 2.03it/s]
|
1133 |
96%|ββββββββββ| 10302/10682 [1:27:29<03:07, 2.03it/s]
|
1134 |
96%|ββββββββββ| 10303/10682 [1:27:29<03:06, 2.03it/s]
|
1135 |
96%|ββββββββββ| 10304/10682 [1:27:29<03:06, 2.03it/s]
|
1136 |
96%|ββββββββββ| 10305/10682 [1:27:30<03:05, 2.03it/s]
|
1137 |
96%|ββββββββββ| 10306/10682 [1:27:30<03:05, 2.03it/s]
|
1138 |
96%|ββββββββββ| 10307/10682 [1:27:31<03:04, 2.03it/s]
|
1139 |
96%|ββββββββββ| 10308/10682 [1:27:31<03:04, 2.03it/s]
|
1140 |
97%|ββββββββββ| 10309/10682 [1:27:32<03:03, 2.03it/s]
|
1141 |
97%|βββββββββοΏ½οΏ½| 10310/10682 [1:27:32<03:03, 2.03it/s]
|
1142 |
97%|ββββββββββ| 10311/10682 [1:27:33<03:02, 2.03it/s]
|
1143 |
97%|ββββββββββ| 10312/10682 [1:27:33<03:02, 2.03it/s]
|
1144 |
97%|ββββββββββ| 10313/10682 [1:27:34<03:01, 2.03it/s]
|
1145 |
97%|ββββββββββ| 10314/10682 [1:27:34<03:01, 2.03it/s]
|
1146 |
97%|ββββββββββ| 10315/10682 [1:27:35<03:01, 2.03it/s]
|
1147 |
97%|ββββββββββ| 10316/10682 [1:27:35<03:00, 2.03it/s]
|
1148 |
97%|ββββββββββ| 10317/10682 [1:27:36<03:00, 2.03it/s]
|
1149 |
97%|ββββββββββ| 10318/10682 [1:27:36<02:59, 2.03it/s]
|
1150 |
97%|ββββββββββ| 10319/10682 [1:27:37<02:59, 2.02it/s]
|
1151 |
97%|ββββββββββ| 10320/10682 [1:27:37<02:58, 2.03it/s]
|
1152 |
97%|ββββββββββ| 10321/10682 [1:27:38<02:58, 2.03it/s]
|
1153 |
97%|ββββββββββ| 10322/10682 [1:27:38<02:57, 2.03it/s]
|
1154 |
97%|ββββββββββ| 10323/10682 [1:27:39<02:56, 2.03it/s]
|
1155 |
97%|ββββββββββ| 10324/10682 [1:27:39<02:56, 2.03it/s]
|
1156 |
97%|ββββββββββ| 10325/10682 [1:27:40<02:55, 2.03it/s]
|
1157 |
|
|
|
1158 |
97%|ββββββββββ| 10325/10682 [1:27:40<02:55, 2.03it/s]
|
1159 |
97%|ββββββββββ| 10326/10682 [1:27:40<02:55, 2.03it/s]
|
1160 |
97%|ββββββββββ| 10327/10682 [1:27:41<02:55, 2.03it/s]
|
1161 |
97%|ββββββββββ| 10328/10682 [1:27:41<02:54, 2.03it/s]
|
1162 |
97%|ββββββββββ| 10329/10682 [1:27:42<02:54, 2.03it/s]
|
1163 |
97%|ββββββββββ| 10330/10682 [1:27:42<02:53, 2.03it/s]
|
1164 |
97%|ββββββββββ| 10331/10682 [1:27:43<02:52, 2.03it/s]
|
1165 |
97%|ββββββββββ| 10332/10682 [1:27:43<02:52, 2.03it/s]
|
1166 |
97%|ββββββββββ| 10333/10682 [1:27:44<02:51, 2.03it/s]
|
1167 |
97%|ββββββββββ| 10334/10682 [1:27:44<02:51, 2.03it/s]
|
1168 |
97%|ββββββββββ| 10335/10682 [1:27:45<02:50, 2.03it/s]
|
1169 |
97%|ββββββββββ| 10336/10682 [1:27:45<02:50, 2.03it/s]
|
1170 |
97%|ββββββββββ| 10337/10682 [1:27:46<02:50, 2.03it/s]
|
1171 |
97%|ββββββββββ| 10338/10682 [1:27:46<02:49, 2.03it/s]
|
1172 |
97%|ββββββββββ| 10339/10682 [1:27:47<02:49, 2.03it/s]
|
1173 |
97%|ββββββββββ| 10340/10682 [1:27:47<02:48, 2.03it/s]
|
1174 |
97%|ββββββββββ| 10341/10682 [1:27:48<02:47, 2.03it/s]
|
1175 |
97%|ββββββββββ| 10342/10682 [1:27:48<02:47, 2.03it/s]
|
1176 |
97%|ββββββββββ| 10343/10682 [1:27:49<02:46, 2.03it/s]
|
1177 |
97%|ββββββββββ| 10344/10682 [1:27:49<02:46, 2.03it/s]
|
1178 |
97%|ββββββββββ| 10345/10682 [1:27:50<02:45, 2.03it/s]
|
1179 |
97%|ββββββββββ| 10346/10682 [1:27:50<02:45, 2.03it/s]
|
1180 |
97%|ββββββββββ| 10347/10682 [1:27:51<02:45, 2.03it/s]
|
1181 |
97%|ββββββββββ| 10348/10682 [1:27:51<02:44, 2.03it/s]
|
1182 |
97%|ββββββββββ| 10349/10682 [1:27:52<02:44, 2.03it/s]
|
1183 |
97%|ββββββββββ| 10350/10682 [1:27:52<02:43, 2.03it/s]{'loss': 2.786, 'grad_norm': 0.2615926265716553, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
|
|
1184 |
|
1185 |
97%|ββββββββββ| 10350/10682 [1:27:52<02:43, 2.03it/s]
|
1186 |
97%|ββββββββββ| 10351/10682 [1:27:53<02:43, 2.02it/s]
|
1187 |
97%|ββββββββββ| 10352/10682 [1:27:53<02:43, 2.02it/s]
|
1188 |
97%|ββββββββββ| 10353/10682 [1:27:54<02:42, 2.02it/s]
|
1189 |
97%|ββββββββββ| 10354/10682 [1:27:54<02:41, 2.03it/s]
|
1190 |
97%|ββββββββββ| 10355/10682 [1:27:55<02:41, 2.03it/s]
|
1191 |
97%|ββββββββββ| 10356/10682 [1:27:55<02:40, 2.03it/s]
|
1192 |
97%|ββββββββββ| 10357/10682 [1:27:56<02:40, 2.03it/s]
|
1193 |
97%|ββββββββββ| 10358/10682 [1:27:56<02:39, 2.03it/s]
|
1194 |
97%|ββββββββββ| 10359/10682 [1:27:57<02:39, 2.03it/s]
|
1195 |
97%|ββββββββββ| 10360/10682 [1:27:57<02:38, 2.03it/s]
|
1196 |
97%|ββββββββββ| 10361/10682 [1:27:58<02:38, 2.03it/s]
|
1197 |
97%|ββββββββββ| 10362/10682 [1:27:58<02:37, 2.03it/s]
|
1198 |
97%|ββββββββββ| 10363/10682 [1:27:59<02:37, 2.03it/s]
|
1199 |
97%|ββββββββββ| 10364/10682 [1:27:59<02:36, 2.03it/s]
|
1200 |
97%|ββββββββββ| 10365/10682 [1:28:00<02:36, 2.03it/s]
|
1201 |
97%|ββββββββββ| 10366/10682 [1:28:00<02:35, 2.03it/s]
|
1202 |
97%|ββββββββββ| 10367/10682 [1:28:01<02:35, 2.03it/s]
|
1203 |
97%|ββββββββββ| 10368/10682 [1:28:01<02:34, 2.03it/s]
|
1204 |
97%|ββββββββββ| 10369/10682 [1:28:02<02:34, 2.03it/s]
|
1205 |
97%|ββββββββββ| 10370/10682 [1:28:02<02:33, 2.03it/s]
|
1206 |
97%|ββββββββββ| 10371/10682 [1:28:03<02:33, 2.03it/s]
|
1207 |
97%|ββββββββββ| 10372/10682 [1:28:03<02:32, 2.03it/s]
|
1208 |
97%|ββββββββββ| 10373/10682 [1:28:04<02:32, 2.03it/s]
|
1209 |
97%|ββββββββββ| 10374/10682 [1:28:04<02:31, 2.03it/s]
|
1210 |
97%|ββββββββββ| 10375/10682 [1:28:04<02:31, 2.03it/s]{'loss': 2.7851, 'grad_norm': 0.26228195428848267, 'learning_rate': 2.5143995351817882e-06, 'epoch': 13.6}
|
|
|
1211 |
|
1212 |
97%|ββββββββββ| 10375/10682 [1:28:04<02:31, 2.03it/s]
|
1213 |
97%|ββββββββββ| 10376/10682 [1:28:05<02:31, 2.02it/s]
|
1214 |
97%|ββββββββββ| 10377/10682 [1:28:05<02:30, 2.02it/s]
|
1215 |
97%|ββββββββββ| 10378/10682 [1:28:06<02:30, 2.02it/s]
|
1216 |
97%|ββββββββββ| 10379/10682 [1:28:06<02:29, 2.03it/s]
|
1217 |
97%|ββββββββββ| 10380/10682 [1:28:07<02:29, 2.03it/s]
|
1218 |
97%|ββββββββββ| 10381/10682 [1:28:07<02:28, 2.03it/s]
|
1219 |
97%|ββββββββββ| 10382/10682 [1:28:08<02:27, 2.03it/s]
|
1220 |
97%|ββββββββββ| 10383/10682 [1:28:08<02:27, 2.03it/s]
|
1221 |
97%|ββββββββββ| 10384/10682 [1:28:09<02:26, 2.03it/s]
|
1222 |
97%|ββββββββββ| 10385/10682 [1:28:09<02:26, 2.03it/s]
|
1223 |
97%|ββββββββββ| 10386/10682 [1:28:10<02:25, 2.03it/s]
|
1224 |
97%|ββββββββββ| 10387/10682 [1:28:10<02:25, 2.03it/s]
|
1225 |
97%|ββββββββββ| 10388/10682 [1:28:11<02:24, 2.03it/s]
|
1226 |
97%|ββββββββββ| 10389/10682 [1:28:11<02:24, 2.03it/s]
|
1227 |
97%|ββββββββββ| 10390/10682 [1:28:12<02:23, 2.03it/s]
|
1228 |
97%|ββββββββββ| 10391/10682 [1:28:12<02:23, 2.03it/s]
|
1229 |
97%|ββββββββββ| 10392/10682 [1:28:13<02:23, 2.03it/s]
|
1230 |
97%|ββββββββββ| 10393/10682 [1:28:13<02:22, 2.03it/s]
|
1231 |
97%|ββββββββββ| 10394/10682 [1:28:14<02:21, 2.03it/s]
|
1232 |
97%|ββββββββββ| 10395/10682 [1:28:14<02:21, 2.03it/s]
|
1233 |
97%|ββββββββββ| 10396/10682 [1:28:15<02:20, 2.03it/s]
|
1234 |
97%|ββββββββββ| 10397/10682 [1:28:15<02:20, 2.03it/s]
|
1235 |
97%|ββββββββββ| 10398/10682 [1:28:16<02:19, 2.03it/s]
|
1236 |
97%|ββββββββββ| 10399/10682 [1:28:16<02:19, 2.03it/s]
|
1237 |
97%|ββββββββββ| 10400/10682 [1:28:17<02:18, 2.03it/s]
|
1238 |
|
|
|
1239 |
97%|ββββββββββ| 10400/10682 [1:28:17<02:18, 2.03it/s]
|
1240 |
97%|ββββββββββ| 10401/10682 [1:28:17<02:18, 2.03it/s]
|
1241 |
97%|ββββββββββ| 10402/10682 [1:28:18<02:18, 2.03it/s]
|
1242 |
97%|ββββββββββ| 10403/10682 [1:28:18<02:17, 2.03it/s]
|
1243 |
97%|ββββββββββ| 10404/10682 [1:28:19<02:17, 2.03it/s]
|
1244 |
97%|ββββββββββ| 10405/10682 [1:28:19<02:16, 2.03it/s]
|
1245 |
97%|ββββββββββ| 10406/10682 [1:28:20<02:15, 2.03it/s]
|
1246 |
97%|ββββββββββ| 10407/10682 [1:28:20<02:15, 2.03it/s]
|
1247 |
97%|ββββββββββ| 10408/10682 [1:28:21<02:14, 2.03it/s]
|
1248 |
97%|ββββββββββ| 10409/10682 [1:28:21<02:14, 2.03it/s]
|
1249 |
97%|ββββββββββ| 10410/10682 [1:28:22<02:14, 2.03it/s]
|
1250 |
97%|ββββββββββ| 10411/10682 [1:28:22<02:13, 2.03it/s]
|
1251 |
97%|ββββββββββ| 10412/10682 [1:28:23<02:13, 2.03it/s]
|
1252 |
97%|ββββββββββ| 10413/10682 [1:28:23<02:12, 2.03it/s]
|
1253 |
97%|ββββββββββ| 10414/10682 [1:28:24<02:12, 2.03it/s]
|
1254 |
98%|ββββββββββ| 10415/10682 [1:28:24<02:11, 2.03it/s]
|
1255 |
98%|ββββββββββ| 10416/10682 [1:28:25<02:11, 2.02it/s]
|
1256 |
98%|ββββββββββ| 10417/10682 [1:28:25<02:10, 2.03it/s]
|
1257 |
98%|ββββββββββ| 10418/10682 [1:28:26<02:10, 2.03it/s]
|
1258 |
98%|ββββββββββ| 10419/10682 [1:28:26<02:09, 2.03it/s]
|
1259 |
98%|ββββββββββ| 10420/10682 [1:28:27<02:09, 2.03it/s]
|
1260 |
98%|ββββββββββ| 10421/10682 [1:28:27<02:08, 2.03it/s]
|
1261 |
98%|ββββββββββ| 10422/10682 [1:28:28<02:08, 2.03it/s]
|
1262 |
98%|ββββββββββ| 10423/10682 [1:28:28<02:07, 2.03it/s]
|
1263 |
98%|ββββββββββ| 10424/10682 [1:28:29<02:07, 2.03it/s]
|
1264 |
98%|ββββββββββ| 10425/10682 [1:28:29<02:06, 2.03it/s]
|
1265 |
|
|
|
1266 |
98%|ββββββββββ| 10425/10682 [1:28:29<02:06, 2.03it/s]
|
1267 |
98%|ββββββββββ| 10426/10682 [1:28:30<02:06, 2.03it/s]
|
1268 |
98%|ββββββββββ| 10427/10682 [1:28:30<02:05, 2.03it/s]
|
1269 |
98%|ββββββββββ| 10428/10682 [1:28:31<02:05, 2.03it/s]
|
1270 |
98%|βββββββοΏ½οΏ½οΏ½ββ| 10429/10682 [1:28:31<02:04, 2.03it/s]
|
1271 |
98%|ββββββββββ| 10430/10682 [1:28:32<02:04, 2.03it/s]
|
1272 |
98%|ββββββββββ| 10431/10682 [1:28:32<02:03, 2.03it/s]
|
1273 |
98%|ββββββββββ| 10432/10682 [1:28:33<02:03, 2.03it/s]
|
1274 |
98%|ββββββββββ| 10433/10682 [1:28:33<02:02, 2.03it/s]
|
1275 |
98%|ββββββββββ| 10434/10682 [1:28:34<02:02, 2.03it/s]
|
1276 |
98%|ββββββββββ| 10435/10682 [1:28:34<02:01, 2.03it/s]
|
1277 |
98%|ββββββββββ| 10436/10682 [1:28:35<02:01, 2.03it/s]
|
1278 |
98%|ββββββββββ| 10437/10682 [1:28:35<02:00, 2.03it/s]
|
1279 |
98%|ββββββββββ| 10438/10682 [1:28:36<02:00, 2.03it/s]
|
1280 |
98%|ββββββββββ| 10439/10682 [1:28:36<01:59, 2.03it/s]
|
1281 |
98%|ββββββββββ| 10440/10682 [1:28:37<01:59, 2.03it/s]
|
1282 |
98%|ββββββββββ| 10441/10682 [1:28:37<01:58, 2.03it/s]
|
1283 |
98%|ββββββββββ| 10442/10682 [1:28:38<01:58, 2.03it/s]
|
1284 |
98%|ββββββββββ| 10443/10682 [1:28:38<01:57, 2.03it/s]
|
1285 |
98%|ββββββββββ| 10444/10682 [1:28:38<01:57, 2.03it/s]
|
1286 |
98%|ββββββββββ| 10445/10682 [1:28:39<01:56, 2.03it/s]
|
1287 |
98%|ββββββββββ| 10446/10682 [1:28:39<01:56, 2.03it/s]
|
1288 |
98%|ββββββββββ| 10447/10682 [1:28:40<01:55, 2.03it/s]
|
1289 |
98%|ββββββββββ| 10448/10682 [1:28:40<01:55, 2.03it/s]
|
1290 |
98%|ββββββββββ| 10449/10682 [1:28:41<01:54, 2.03it/s]
|
1291 |
98%|ββββββββββ| 10450/10682 [1:28:41<01:54, 2.03it/s]
|
1292 |
|
|
|
1293 |
98%|ββββββββββ| 10450/10682 [1:28:41<01:54, 2.03it/s]
|
1294 |
98%|ββββββββββ| 10451/10682 [1:28:42<01:53, 2.03it/s]
|
1295 |
98%|ββββββββββ| 10452/10682 [1:28:42<01:53, 2.03it/s]
|
1296 |
98%|ββββββββββ| 10453/10682 [1:28:43<01:52, 2.03it/s]
|
1297 |
98%|ββββββββββ| 10454/10682 [1:28:43<01:52, 2.03it/s]
|
1298 |
98%|ββββββββββ| 10455/10682 [1:28:44<01:51, 2.03it/s]
|
1299 |
98%|ββββββββββ| 10456/10682 [1:28:44<01:51, 2.03it/s]
|
1300 |
98%|ββββββββββ| 10457/10682 [1:28:45<01:50, 2.03it/s]
|
1301 |
98%|ββββββββββ| 10458/10682 [1:28:45<01:50, 2.03it/s]
|
1302 |
98%|ββββββββββ| 10459/10682 [1:28:46<01:49, 2.03it/s]
|
1303 |
98%|ββββββββββ| 10460/10682 [1:28:46<01:49, 2.03it/s]
|
1304 |
98%|ββββββββββ| 10461/10682 [1:28:47<01:48, 2.03it/s]
|
1305 |
98%|ββββββββββ| 10462/10682 [1:28:47<01:48, 2.03it/s]
|
1306 |
98%|ββββββββββ| 10463/10682 [1:28:48<01:47, 2.03it/s]
|
1307 |
98%|ββββββββββ| 10464/10682 [1:28:48<01:47, 2.03it/s]
|
1308 |
98%|ββββββββββ| 10465/10682 [1:28:49<01:47, 2.03it/s]
|
1309 |
98%|ββββββββββ| 10466/10682 [1:28:49<01:46, 2.03it/s]
|
1310 |
98%|ββββββββββ| 10467/10682 [1:28:50<01:46, 2.03it/s]
|
1311 |
98%|ββββββββββ| 10468/10682 [1:28:50<01:45, 2.03it/s]
|
1312 |
98%|ββββββββββ| 10469/10682 [1:28:51<01:44, 2.03it/s]
|
1313 |
98%|ββββββββββ| 10470/10682 [1:28:51<01:44, 2.03it/s]
|
1314 |
98%|ββββββββββ| 10471/10682 [1:28:52<01:43, 2.03it/s]
|
1315 |
98%|ββββββββββ| 10472/10682 [1:28:52<01:43, 2.03it/s]
|
1316 |
98%|ββββββββββ| 10473/10682 [1:28:53<01:42, 2.03it/s]
|
1317 |
98%|ββββββββββ| 10474/10682 [1:28:53<01:42, 2.03it/s]
|
1318 |
98%|ββββββββββ| 10475/10682 [1:28:54<01:42, 2.03it/s]{'loss': 2.7933, 'grad_norm': 0.26085323095321655, 'learning_rate': 1.143659993153079e-06, 'epoch': 13.73}
|
|
|
1319 |
|
1320 |
98%|ββββββββββ| 10475/10682 [1:28:54<01:42, 2.03it/s]
|
1321 |
98%|ββββββββββ| 10476/10682 [1:28:54<01:41, 2.03it/s]
|
1322 |
98%|ββββββββββ| 10477/10682 [1:28:55<01:41, 2.03it/s]
|
1323 |
98%|ββββββββββ| 10478/10682 [1:28:55<01:40, 2.03it/s]
|
1324 |
98%|ββββββββββ| 10479/10682 [1:28:56<01:40, 2.03it/s]
|
1325 |
98%|ββββββββββ| 10480/10682 [1:28:56<01:39, 2.03it/s]
|
1326 |
98%|ββββββββββ| 10481/10682 [1:28:57<01:38, 2.03it/s]
|
1327 |
98%|ββββββββββ| 10482/10682 [1:28:57<01:38, 2.03it/s]
|
1328 |
98%|ββββββββββ| 10483/10682 [1:28:58<01:37, 2.03it/s]
|
1329 |
98%|ββββββββββ| 10484/10682 [1:28:58<01:37, 2.03it/s]
|
1330 |
98%|ββββββββββ| 10485/10682 [1:28:59<01:37, 2.03it/s]
|
1331 |
98%|ββββββββββ| 10486/10682 [1:28:59<01:36, 2.03it/s]
|
1332 |
98%|ββββββββββ| 10487/10682 [1:29:00<01:36, 2.03it/s]
|
1333 |
98%|ββββββββββ| 10488/10682 [1:29:00<01:35, 2.03it/s]
|
1334 |
98%|ββββββββββ| 10489/10682 [1:29:01<01:35, 2.03it/s]
|
1335 |
98%|ββββββββοΏ½οΏ½β| 10490/10682 [1:29:01<01:34, 2.03it/s]
|
1336 |
98%|ββββββββββ| 10491/10682 [1:29:02<01:34, 2.03it/s]
|
1337 |
98%|ββββββββββ| 10492/10682 [1:29:02<01:33, 2.03it/s]
|
1338 |
98%|ββββββββββ| 10493/10682 [1:29:03<01:33, 2.03it/s]
|
1339 |
98%|ββββββββββ| 10494/10682 [1:29:03<01:32, 2.03it/s]
|
1340 |
98%|ββββββββββ| 10495/10682 [1:29:04<01:32, 2.03it/s]
|
1341 |
98%|ββββββββββ| 10496/10682 [1:29:04<01:31, 2.03it/s]
|
1342 |
98%|ββββββββββ| 10497/10682 [1:29:05<01:31, 2.03it/s]
|
1343 |
98%|ββββββββββ| 10498/10682 [1:29:05<01:30, 2.03it/s]
|
1344 |
98%|ββββββββββ| 10499/10682 [1:29:06<01:30, 2.03it/s]
|
1345 |
98%|ββββββββββ| 10500/10682 [1:29:06<01:29, 2.03it/s]{'loss': 2.779, 'grad_norm': 0.2600437104701996, 'learning_rate': 8.841716933915555e-07, 'epoch': 13.76}
|
1346 |
|
|
|
1347 |
98%|ββββββββββ| 10500/10682 [1:29:06<01:29, 2.03it/s]
|
1348 |
98%|ββββββββββ| 10501/10682 [1:29:07<01:29, 2.02it/s]
|
1349 |
98%|ββββββββββ| 10502/10682 [1:29:07<01:28, 2.03it/s]
|
1350 |
98%|ββββββββββ| 10503/10682 [1:29:08<01:28, 2.02it/s]
|
1351 |
98%|ββββββββββ| 10504/10682 [1:29:08<01:27, 2.02it/s]
|
1352 |
98%|ββββββββββ| 10505/10682 [1:29:09<01:27, 2.03it/s]
|
1353 |
98%|ββββββββββ| 10506/10682 [1:29:09<01:26, 2.03it/s]
|
1354 |
98%|ββββββββββ| 10507/10682 [1:29:10<01:26, 2.03it/s]
|
1355 |
98%|ββββββββββ| 10508/10682 [1:29:10<01:25, 2.03it/s]
|
1356 |
98%|ββββββββββ| 10509/10682 [1:29:11<01:25, 2.03it/s]
|
1357 |
98%|ββββββββββ| 10510/10682 [1:29:11<01:24, 2.03it/s]
|
1358 |
98%|ββββββββββ| 10511/10682 [1:29:12<01:24, 2.03it/s]
|
1359 |
98%|ββββββββββ| 10512/10682 [1:29:12<01:23, 2.03it/s]
|
1360 |
98%|ββββββββββ| 10513/10682 [1:29:13<01:23, 2.03it/s]
|
1361 |
98%|ββββββββββ| 10514/10682 [1:29:13<01:22, 2.03it/s]
|
1362 |
98%|ββββββββββ| 10515/10682 [1:29:13<01:22, 2.03it/s]
|
1363 |
98%|ββββββββββ| 10516/10682 [1:29:14<01:21, 2.03it/s]
|
1364 |
98%|ββββββββββ| 10517/10682 [1:29:14<01:21, 2.03it/s]
|
1365 |
98%|ββββββββββ| 10518/10682 [1:29:15<01:20, 2.03it/s]
|
1366 |
98%|ββββββββββ| 10519/10682 [1:29:15<01:20, 2.03it/s]
|
1367 |
98%|ββββββββββ| 10520/10682 [1:29:16<01:19, 2.03it/s]
|
1368 |
98%|ββββββββββ| 10521/10682 [1:29:16<01:19, 2.03it/s]
|
1369 |
99%|ββββββββββ| 10522/10682 [1:29:17<01:18, 2.03it/s]
|
1370 |
99%|ββββββββββ| 10523/10682 [1:29:17<01:18, 2.03it/s]
|
1371 |
99%|ββββββββββ| 10524/10682 [1:29:18<01:17, 2.03it/s]
|
1372 |
99%|ββββββββββ| 10525/10682 [1:29:18<01:17, 2.03it/s]
|
1373 |
|
|
|
1374 |
99%|ββββββββββ| 10525/10682 [1:29:18<01:17, 2.03it/s]
|
1375 |
99%|ββββββββββ| 10526/10682 [1:29:19<01:16, 2.03it/s]
|
1376 |
99%|ββββββββββ| 10527/10682 [1:29:19<01:16, 2.03it/s]
|
1377 |
99%|ββββββββββ| 10528/10682 [1:29:20<01:15, 2.03it/s]
|
1378 |
99%|ββββββββββ| 10529/10682 [1:29:20<01:15, 2.03it/s]
|
1379 |
99%|ββββββββββ| 10530/10682 [1:29:21<01:14, 2.03it/s]
|
1380 |
99%|ββββββββββ| 10531/10682 [1:29:21<01:14, 2.03it/s]
|
1381 |
99%|ββββββββββ| 10532/10682 [1:29:22<01:13, 2.03it/s]
|
1382 |
99%|ββββββββββ| 10533/10682 [1:29:22<01:13, 2.03it/s]
|
1383 |
99%|ββββββββββ| 10534/10682 [1:29:23<01:12, 2.03it/s]
|
1384 |
99%|ββββββββββ| 10535/10682 [1:29:23<01:12, 2.03it/s]
|
1385 |
99%|ββββββββββ| 10536/10682 [1:29:24<01:11, 2.03it/s]
|
1386 |
99%|ββββββββββ| 10537/10682 [1:29:24<01:11, 2.03it/s]
|
1387 |
99%|ββββββββββ| 10538/10682 [1:29:25<01:10, 2.03it/s]
|
1388 |
99%|ββββββββββ| 10539/10682 [1:29:25<01:10, 2.03it/s]
|
1389 |
99%|ββββββββββ| 10540/10682 [1:29:26<01:09, 2.03it/s]
|
1390 |
99%|ββββββββββ| 10541/10682 [1:29:26<01:09, 2.03it/s]
|
1391 |
99%|ββββββββββ| 10542/10682 [1:29:27<01:08, 2.03it/s]
|
1392 |
99%|ββββββββββ| 10543/10682 [1:29:27<01:08, 2.03it/s]
|
1393 |
99%|ββββββββββ| 10544/10682 [1:29:28<01:08, 2.03it/s]
|
1394 |
99%|ββββββββββ| 10545/10682 [1:29:28<01:07, 2.03it/s]
|
1395 |
99%|ββββββββββ| 10546/10682 [1:29:29<01:07, 2.03it/s]
|
1396 |
99%|ββββββββββ| 10547/10682 [1:29:29<01:06, 2.03it/s]
|
1397 |
99%|ββββββββββ| 10548/10682 [1:29:30<01:05, 2.03it/s]
|
1398 |
99%|ββββββββββ| 10549/10682 [1:29:30<01:05, 2.03it/s]
|
1399 |
99%|ββββββββββ| 10550/10682 [1:29:31<01:04, 2.03it/s]{'loss': 2.7851, 'grad_norm': 0.26254361867904663, 'learning_rate': 4.651600211027507e-07, 'epoch': 13.83}
|
1400 |
|
|
|
1401 |
99%|ββββββββββ| 10550/10682 [1:29:31<01:04, 2.03it/s]
|
1402 |
99%|ββββββββββ| 10551/10682 [1:29:31<01:04, 2.03it/s]
|
1403 |
99%|ββββββββββ| 10552/10682 [1:29:32<01:04, 2.03it/s]
|
1404 |
99%|ββββββββββ| 10553/10682 [1:29:32<01:03, 2.03it/s]
|
1405 |
99%|ββββββββββ| 10554/10682 [1:29:33<01:03, 2.03it/s]
|
1406 |
99%|ββββββββββ| 10555/10682 [1:29:33<01:02, 2.03it/s]
|
1407 |
99%|ββββββββββ| 10556/10682 [1:29:34<01:02, 2.03it/s]
|
1408 |
99%|ββββββββββ| 10557/10682 [1:29:34<01:01, 2.03it/s]
|
1409 |
99%|ββββββββββ| 10558/10682 [1:29:35<01:01, 2.03it/s]
|
1410 |
99%|ββββββββββ| 10559/10682 [1:29:35<01:00, 2.03it/s]
|
1411 |
99%|ββββββββββ| 10560/10682 [1:29:36<01:00, 2.03it/s]
|
1412 |
99%|ββββββββββ| 10561/10682 [1:29:36<00:59, 2.03it/s]
|
1413 |
99%|ββββββββββ| 10562/10682 [1:29:37<00:59, 2.03it/s]
|
1414 |
99%|ββββββββββ| 10563/10682 [1:29:37<00:58, 2.03it/s]
|
1415 |
99%|ββββββββββ| 10564/10682 [1:29:38<00:58, 2.03it/s]
|
1416 |
99%|ββββββββββ| 10565/10682 [1:29:38<00:57, 2.03it/s]
|
1417 |
99%|ββββββββββ| 10566/10682 [1:29:39<00:57, 2.03it/s]
|
1418 |
99%|ββββββββββ| 10567/10682 [1:29:39<00:56, 2.03it/s]
|
1419 |
99%|ββββββββββ| 10568/10682 [1:29:40<00:56, 2.03it/s]
|
1420 |
99%|ββββββββββ| 10569/10682 [1:29:40<00:55, 2.03it/s]
|
1421 |
99%|ββββββββββ| 10570/10682 [1:29:41<00:55, 2.03it/s]
|
1422 |
99%|ββββββββββ| 10571/10682 [1:29:41<00:54, 2.03it/s]
|
1423 |
99%|ββββββββββ| 10572/10682 [1:29:42<00:54, 2.03it/s]
|
1424 |
99%|ββββββββββ| 10573/10682 [1:29:42<00:53, 2.03it/s]
|
1425 |
99%|ββββββββββ| 10574/10682 [1:29:43<00:53, 2.03it/s]
|
1426 |
99%|ββββββββββ| 10575/10682 [1:29:43<00:52, 2.03it/s]
|
1427 |
{'loss': 2.7812, 'grad_norm': 0.2619117498397827, 'learning_rate': 3.0566461813213986e-07, 'epoch': 13.86}
|
|
|
1428 |
99%|ββββββββββ| 10575/10682 [1:29:43<00:52, 2.03it/s]
|
1429 |
99%|ββββββββββ| 10576/10682 [1:29:44<00:52, 2.03it/s]
|
1430 |
99%|ββββββββββ| 10577/10682 [1:29:44<00:51, 2.03it/s]
|
1431 |
99%|ββββββββββ| 10578/10682 [1:29:45<00:51, 2.03it/s]
|
1432 |
99%|ββββββββββ| 10579/10682 [1:29:45<00:50, 2.03it/s]
|
1433 |
99%|ββββββββββ| 10580/10682 [1:29:46<00:50, 2.03it/s]
|
1434 |
99%|ββββββββββ| 10581/10682 [1:29:46<00:49, 2.03it/s]
|
1435 |
99%|ββββββββββ| 10582/10682 [1:29:46<00:49, 2.03it/s]
|
1436 |
99%|ββββββββββ| 10583/10682 [1:29:47<00:48, 2.03it/s]
|
1437 |
99%|ββββββββββ| 10584/10682 [1:29:47<00:48, 2.03it/s]
|
1438 |
99%|ββββββββββ| 10585/10682 [1:29:48<00:47, 2.03it/s]
|
1439 |
99%|ββββββββββ| 10586/10682 [1:29:48<00:47, 2.03it/s]
|
1440 |
99%|ββββββββββ| 10587/10682 [1:29:49<00:46, 2.03it/s]
|
1441 |
99%|ββββββββββ| 10588/10682 [1:29:49<00:46, 2.03it/s]
|
1442 |
99%|ββββββββββ| 10589/10682 [1:29:50<00:45, 2.03it/s]
|
1443 |
99%|ββββββββββ| 10590/10682 [1:29:50<00:45, 2.03it/s]
|
1444 |
99%|ββββββββββ| 10591/10682 [1:29:51<00:44, 2.03it/s]
|
1445 |
99%|ββββββββββ| 10592/10682 [1:29:51<00:44, 2.03it/s]
|
1446 |
99%|ββββββββββ| 10593/10682 [1:29:52<00:43, 2.03it/s]
|
1447 |
99%|ββββββββββ| 10594/10682 [1:29:52<00:43, 2.03it/s]
|
1448 |
99%|ββββββββββ| 10595/10682 [1:29:53<00:42, 2.03it/s]
|
1449 |
99%|ββββββββββ| 10596/10682 [1:29:53<00:42, 2.03it/s]
|
1450 |
99%|ββββββββββ| 10597/10682 [1:29:54<00:41, 2.03it/s]
|
1451 |
99%|ββββββββββ| 10598/10682 [1:29:54<00:41, 2.03it/s]
|
1452 |
99%|ββββββββββ| 10599/10682 [1:29:55<00:40, 2.03it/s]
|
1453 |
99%|ββββββββββ| 10600/10682 [1:29:55<00:40, 2.03it/s]
|
1454 |
{'loss': 2.7727, 'grad_norm': 0.2640470564365387, 'learning_rate': 1.7952444123359167e-07, 'epoch': 13.89}
|
|
|
1455 |
99%|ββββββββββ| 10600/10682 [1:29:55<00:40, 2.03it/s]
|
1456 |
99%|ββββββββββ| 10601/10682 [1:29:56<00:40, 2.02it/s]
|
1457 |
99%|ββββββββββ| 10602/10682 [1:29:56<00:39, 2.03it/s]
|
1458 |
99%|ββββββββββ| 10603/10682 [1:29:57<00:39, 2.02it/s]
|
1459 |
99%|ββββββββββ| 10604/10682 [1:29:57<00:38, 2.03it/s]
|
1460 |
99%|ββββββββββ| 10605/10682 [1:29:58<00:37, 2.03it/s]
|
1461 |
99%|ββββββββββ| 10606/10682 [1:29:58<00:37, 2.03it/s]
|
1462 |
99%|ββββββββββ| 10607/10682 [1:29:59<00:36, 2.03it/s]
|
1463 |
99%|ββββββββββ| 10608/10682 [1:29:59<00:36, 2.03it/s]
|
1464 |
99%|ββββββοΏ½οΏ½οΏ½βββ| 10609/10682 [1:30:00<00:35, 2.03it/s]
|
1465 |
99%|ββββββββββ| 10610/10682 [1:30:00<00:35, 2.03it/s]
|
1466 |
99%|ββββββββββ| 10611/10682 [1:30:01<00:34, 2.03it/s]
|
1467 |
99%|ββββββββββ| 10612/10682 [1:30:01<00:34, 2.03it/s]
|
1468 |
99%|ββββββββββ| 10613/10682 [1:30:02<00:34, 2.03it/s]
|
1469 |
99%|ββββββββββ| 10614/10682 [1:30:02<00:33, 2.03it/s]
|
1470 |
99%|ββββββββββ| 10615/10682 [1:30:03<00:33, 2.03it/s]
|
1471 |
99%|ββββββββββ| 10616/10682 [1:30:03<00:32, 2.03it/s]
|
1472 |
99%|ββββββββββ| 10617/10682 [1:30:04<00:32, 2.03it/s]
|
1473 |
99%|ββββββββββ| 10618/10682 [1:30:04<00:31, 2.03it/s]
|
1474 |
99%|ββββββββββ| 10619/10682 [1:30:05<00:31, 2.03it/s]
|
1475 |
99%|ββββββββββ| 10620/10682 [1:30:05<00:30, 2.03it/s]
|
1476 |
99%|ββββββββββ| 10621/10682 [1:30:06<00:30, 2.03it/s]
|
1477 |
99%|ββββββββββ| 10622/10682 [1:30:06<00:29, 2.03it/s]
|
1478 |
99%|ββββββββββ| 10623/10682 [1:30:07<00:29, 2.03it/s]
|
1479 |
99%|ββββββββββ| 10624/10682 [1:30:07<00:28, 2.03it/s]
|
1480 |
99%|ββββββββββ| 10625/10682 [1:30:08<00:28, 2.03it/s]
|
1481 |
{'loss': 2.7884, 'grad_norm': 0.2641913592815399, 'learning_rate': 8.674791042273533e-08, 'epoch': 13.93}
|
|
|
1482 |
99%|ββββββββββ| 10625/10682 [1:30:08<00:28, 2.03it/s]
|
1483 |
99%|ββββββββββ| 10626/10682 [1:30:08<00:27, 2.03it/s]
|
1484 |
99%|ββββββββββ| 10627/10682 [1:30:09<00:27, 2.03it/s]
|
1485 |
99%|ββββββββββ| 10628/10682 [1:30:09<00:26, 2.03it/s]
|
1486 |
|
|
|
1487 |
{'loss': 2.7799, 'grad_norm': 0.26377299427986145, 'learning_rate': 1.3083313863404555e-09, 'epoch': 13.99}
|
|
|
|
|
1488 |
|
|
|
|
618 |
|
619 |
92%|ββββββββββ| 9850/10682 [1:23:33<06:49, 2.03it/s]
|
620 |
92%|ββββββββββ| 9851/10682 [1:23:34<06:49, 2.03it/s]
|
621 |
92%|ββββββββββ| 9852/10682 [1:23:34<06:49, 2.03it/s]
|
622 |
92%|ββββββββββ| 9853/10682 [1:23:35<06:49, 2.03it/s]
|
623 |
92%|ββββββββββ| 9854/10682 [1:23:35<06:48, 2.03it/s]
|
624 |
92%|ββββββββββ| 9855/10682 [1:23:36<06:48, 2.03it/s]
|
625 |
92%|ββββββββββ| 9856/10682 [1:23:36<06:47, 2.03it/s]
|
626 |
92%|ββββββββββ| 9857/10682 [1:23:37<06:46, 2.03it/s]
|
627 |
92%|ββββββββββ| 9858/10682 [1:23:37<06:46, 2.03it/s]
|
628 |
92%|ββββββββββ| 9859/10682 [1:23:38<06:45, 2.03it/s]
|
629 |
92%|ββββββββββ| 9860/10682 [1:23:38<06:44, 2.03it/s]
|
630 |
92%|ββββββββββ| 9861/10682 [1:23:39<06:44, 2.03it/s]
|
631 |
92%|ββββββββββ| 9862/10682 [1:23:39<06:43, 2.03it/s]
|
632 |
92%|ββββββββββ| 9863/10682 [1:23:40<06:43, 2.03it/s]
|
633 |
92%|ββββββββββ| 9864/10682 [1:23:40<06:42, 2.03it/s]
|
634 |
92%|ββββββββββ| 9865/10682 [1:23:41<06:42, 2.03it/s]
|
635 |
92%|ββββββββββ| 9866/10682 [1:23:41<06:42, 2.03it/s]
|
636 |
92%|ββββββββββ| 9867/10682 [1:23:42<06:41, 2.03it/s]
|
637 |
92%|ββββββββββ| 9868/10682 [1:23:42<06:41, 2.03it/s]
|
638 |
92%|ββββββββββ| 9869/10682 [1:23:43<06:40, 2.03it/s]
|
639 |
92%|ββββββββββ| 9870/10682 [1:23:43<06:39, 2.03it/s]
|
640 |
92%|ββββββββββ| 9871/10682 [1:23:44<06:39, 2.03it/s]
|
641 |
92%|ββββββββββ| 9872/10682 [1:23:44<06:38, 2.03it/s]
|
642 |
92%|ββββββββββ| 9873/10682 [1:23:45<06:38, 2.03it/s]
|
643 |
92%|ββββββββββ| 9874/10682 [1:23:45<06:38, 2.03it/s]
|
644 |
92%|ββββββββββ| 9875/10682 [1:23:46<06:37, 2.03it/s]
|
645 |
{'loss': 2.8104, 'grad_norm': 0.26470834016799927, 'learning_rate': 1.7288216615031272e-05, 'epoch': 12.94}
|
646 |
|
647 |
92%|ββββββββββ| 9875/10682 [1:23:46<06:37, 2.03it/s]
|
648 |
92%|ββββββββββ| 9876/10682 [1:23:46<06:37, 2.03it/s]
|
649 |
92%|ββββββββββ| 9877/10682 [1:23:47<06:36, 2.03it/s]
|
650 |
92%|ββββββββββ| 9878/10682 [1:23:47<06:36, 2.03it/s]
|
651 |
92%|ββββββββββ| 9879/10682 [1:23:48<06:35, 2.03it/s]
|
652 |
92%|ββββββββββ| 9880/10682 [1:23:48<06:35, 2.03it/s]
|
653 |
93%|ββββββββββ| 9881/10682 [1:23:49<06:34, 2.03it/s]
|
654 |
93%|ββββββββββ| 9882/10682 [1:23:49<06:34, 2.03it/s]
|
655 |
93%|ββββββββββ| 9883/10682 [1:23:50<06:34, 2.03it/s]
|
656 |
93%|ββββββββββ| 9884/10682 [1:23:50<06:33, 2.03it/s]
|
657 |
93%|ββββββββββ| 9885/10682 [1:23:51<06:33, 2.03it/s]
|
658 |
93%|ββββββββββ| 9886/10682 [1:23:51<06:32, 2.03it/s]
|
659 |
93%|ββββββββββ| 9887/10682 [1:23:52<06:32, 2.03it/s]
|
660 |
93%|ββββββββββ| 9888/10682 [1:23:52<06:31, 2.03it/s]
|
661 |
93%|ββββββββββ| 9889/10682 [1:23:53<06:30, 2.03it/s]
|
662 |
93%|ββββββββββ| 9890/10682 [1:23:53<06:30, 2.03it/s]
|
663 |
93%|ββββββββββ| 9891/10682 [1:23:54<06:29, 2.03it/s]
|
664 |
93%|ββββββββββ| 9892/10682 [1:23:54<06:29, 2.03it/s]
|
665 |
93%|ββββββββββ| 9893/10682 [1:23:55<06:29, 2.03it/s]
|
666 |
93%|ββββββββββ| 9894/10682 [1:23:55<06:28, 2.03it/s]
|
667 |
93%|ββββββββββ| 9895/10682 [1:23:56<06:28, 2.03it/s]
|
668 |
93%|ββββββββββ| 9896/10682 [1:23:56<06:27, 2.03it/s]
|
669 |
93%|ββββββββββ| 9897/10682 [1:23:57<06:27, 2.03it/s]
|
670 |
93%|ββββββββββ| 9898/10682 [1:23:57<06:26, 2.03it/s]
|
671 |
93%|ββββββββββ| 9899/10682 [1:23:58<06:25, 2.03it/s]
|
672 |
93%|ββββββββββ| 9900/10682 [1:23:58<06:25, 2.03it/s]
|
673 |
|
674 |
|
675 |
93%|ββββββββββ| 9900/10682 [1:23:58<06:25, 2.03it/s]
|
676 |
93%|ββββββββββ| 9901/10682 [1:23:59<06:25, 2.02it/s]
|
677 |
93%|ββββββββββ| 9902/10682 [1:23:59<06:24, 2.03it/s]
|
678 |
93%|ββββββββββ| 9903/10682 [1:24:00<06:24, 2.03it/s]
|
679 |
93%|ββββββββββ| 9904/10682 [1:24:00<06:23, 2.03it/s]
|
680 |
93%|ββββββββββ| 9905/10682 [1:24:01<06:22, 2.03it/s]
|
681 |
93%|ββββββββββ| 9906/10682 [1:24:01<06:22, 2.03it/s]
|
682 |
93%|ββββββββββ| 9907/10682 [1:24:02<06:21, 2.03it/s]
|
683 |
93%|ββββββββββ| 9908/10682 [1:24:02<06:20, 2.03it/s]
|
684 |
93%|ββββββββββ| 9909/10682 [1:24:03<06:21, 2.03it/s]
|
685 |
93%|ββββββββββ| 9910/10682 [1:24:03<06:20, 2.03it/s]
|
686 |
93%|ββββββββββ| 9911/10682 [1:24:04<06:20, 2.03it/s]
|
687 |
93%|ββββββββββ| 9912/10682 [1:24:04<06:19, 2.03it/s]
|
688 |
93%|ββββββββββ| 9913/10682 [1:24:05<06:18, 2.03it/s]
|
689 |
93%|ββββββββββ| 9914/10682 [1:24:05<06:18, 2.03it/s]
|
690 |
93%|ββββββββββ| 9915/10682 [1:24:05<06:17, 2.03it/s]
|
691 |
93%|ββββββββββ| 9916/10682 [1:24:06<06:17, 2.03it/s]
|
692 |
93%|ββββββββββ| 9917/10682 [1:24:06<06:16, 2.03it/s]
|
693 |
93%|ββββββββββ| 9918/10682 [1:24:07<06:16, 2.03it/s]
|
694 |
93%|ββββββββββ| 9919/10682 [1:24:07<06:12, 2.05it/s]
|
695 |
93%|ββββββββββ| 9920/10682 [1:24:20<51:57, 4.09s/it]
|
696 |
93%|ββββββββββ| 9921/10682 [1:24:20<38:11, 3.01s/it]
|
697 |
93%|ββββββββββ| 9922/10682 [1:24:21<28:34, 2.26s/it]
|
698 |
93%|ββββββββββ| 9923/10682 [1:24:21<21:52, 1.73s/it]
|
699 |
93%|ββββββββββ| 9924/10682 [1:24:22<17:09, 1.36s/it]
|
700 |
93%|ββββββββββ| 9925/10682 [1:24:22<13:50, 1.10s/it]{'loss': 2.8021, 'grad_norm': 0.26082977652549744, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.01}
|
701 |
|
|
|
702 |
93%|ββββββββββ| 9925/10682 [1:24:22<13:50, 1.10s/it]
|
703 |
93%|ββββββββββ| 9926/10682 [1:24:23<11:33, 1.09it/s]
|
704 |
93%|ββββββββββ| 9927/10682 [1:24:23<09:56, 1.27it/s]
|
705 |
93%|ββββββββββ| 9928/10682 [1:24:24<08:49, 1.43it/s]
|
706 |
93%|ββββββββββ| 9929/10682 [1:24:24<08:01, 1.56it/s]
|
707 |
93%|ββββββββββ| 9930/10682 [1:24:25<07:27, 1.68it/s]
|
708 |
93%|ββββββββββ| 9931/10682 [1:24:25<07:04, 1.77it/s]
|
709 |
93%|ββββββββββ| 9932/10682 [1:24:26<06:47, 1.84it/s]
|
710 |
93%|ββββββββββ| 9933/10682 [1:24:26<06:35, 1.89it/s]
|
711 |
93%|ββββββββββ| 9934/10682 [1:24:27<06:27, 1.93it/s]
|
712 |
93%|ββββββββββ| 9935/10682 [1:24:27<06:21, 1.96it/s]
|
713 |
93%|ββββββββββ| 9936/10682 [1:24:28<06:17, 1.98it/s]
|
714 |
93%|ββββββββββ| 9937/10682 [1:24:28<06:14, 1.99it/s]
|
715 |
93%|ββββββββββ| 9938/10682 [1:24:29<06:11, 2.00it/s]
|
716 |
93%|ββββββββββ| 9939/10682 [1:24:29<06:10, 2.01it/s]
|
717 |
93%|ββββββββββ| 9940/10682 [1:24:30<06:08, 2.01it/s]
|
718 |
93%|ββββββββββ| 9941/10682 [1:24:30<06:07, 2.01it/s]
|
719 |
93%|ββββββββββ| 9942/10682 [1:24:31<06:06, 2.02it/s]
|
720 |
93%|ββββββββββ| 9943/10682 [1:24:31<06:07, 2.01it/s]
|
721 |
93%|ββββββββββ| 9944/10682 [1:24:32<06:06, 2.01it/s]
|
722 |
93%|ββββββββββ| 9945/10682 [1:24:32<06:05, 2.02it/s]
|
723 |
93%|ββββββββββ| 9946/10682 [1:24:33<06:04, 2.02it/s]
|
724 |
93%|ββββββββββ| 9947/10682 [1:24:33<06:03, 2.02it/s]
|
725 |
+
|
726 |
93%|ββββββββββ| 9925/10682 [1:24:22<13:50, 1.10s/it]
|
727 |
93%|ββββββββββ| 9926/10682 [1:24:23<11:33, 1.09it/s]
|
728 |
93%|ββββββββββ| 9927/10682 [1:24:23<09:56, 1.27it/s]
|
729 |
93%|ββββββββββ| 9928/10682 [1:24:24<08:49, 1.43it/s]
|
730 |
93%|ββββββββββ| 9929/10682 [1:24:24<08:01, 1.56it/s]
|
731 |
93%|ββββββββββ| 9930/10682 [1:24:25<07:27, 1.68it/s]
|
732 |
93%|ββββββββββ| 9931/10682 [1:24:25<07:04, 1.77it/s]
|
733 |
93%|ββββββββββ| 9932/10682 [1:24:26<06:47, 1.84it/s]
|
734 |
93%|ββββββββββ| 9933/10682 [1:24:26<06:35, 1.89it/s]
|
735 |
93%|ββββββββββ| 9934/10682 [1:24:27<06:27, 1.93it/s]
|
736 |
93%|ββββββββββ| 9935/10682 [1:24:27<06:21, 1.96it/s]
|
737 |
93%|ββββββββββ| 9936/10682 [1:24:28<06:17, 1.98it/s]
|
738 |
93%|ββββββββββ| 9937/10682 [1:24:28<06:14, 1.99it/s]
|
739 |
93%|ββββββββββ| 9938/10682 [1:24:29<06:11, 2.00it/s]
|
740 |
93%|ββββββββββ| 9939/10682 [1:24:29<06:10, 2.01it/s]
|
741 |
93%|ββββββββββ| 9940/10682 [1:24:30<06:08, 2.01it/s]
|
742 |
93%|ββββββββββ| 9941/10682 [1:24:30<06:07, 2.01it/s]
|
743 |
93%|ββββββββββ| 9942/10682 [1:24:31<06:06, 2.02it/s]
|
744 |
93%|ββββββββββ| 9943/10682 [1:24:31<06:07, 2.01it/s]
|
745 |
93%|ββββββββββ| 9944/10682 [1:24:32<06:06, 2.01it/s]
|
746 |
93%|ββββββββββ| 9945/10682 [1:24:32<06:05, 2.02it/s]
|
747 |
93%|ββββββββββ| 9946/10682 [1:24:33<06:04, 2.02it/s]
|
748 |
93%|ββββββββββ| 9947/10682 [1:24:33<06:03, 2.02it/s]
|
749 |
93%|ββββββββββ| 9948/10682 [1:24:34<06:02, 2.03it/s]
|
750 |
93%|ββββββββββ| 9949/10682 [1:24:34<06:01, 2.03it/s]
|
751 |
93%|ββββββββββ| 9950/10682 [1:24:35<06:00, 2.03it/s]{'loss': 2.7759, 'grad_norm': 0.26722249388694763, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.04}
|
752 |
|
753 |
+
|
754 |
93%|ββββββββββ| 9950/10682 [1:24:35<06:00, 2.03it/s]
|
755 |
93%|ββββββββββ| 9951/10682 [1:24:35<06:01, 2.02it/s]
|
756 |
93%|ββββββββββ| 9952/10682 [1:24:36<06:00, 2.03it/s]
|
757 |
93%|ββββββββββ| 9953/10682 [1:24:36<05:59, 2.03it/s]
|
758 |
93%|ββββββββββ| 9954/10682 [1:24:37<05:59, 2.03it/s]
|
759 |
93%|ββββββββββ| 9955/10682 [1:24:37<05:58, 2.03it/s]
|
760 |
93%|ββββββββββ| 9956/10682 [1:24:38<05:58, 2.03it/s]
|
761 |
93%|ββββββββββ| 9957/10682 [1:24:38<05:57, 2.03it/s]
|
762 |
93%|ββββββββββ| 9958/10682 [1:24:39<05:56, 2.03it/s]
|
763 |
93%|ββββββββββ| 9959/10682 [1:24:39<05:56, 2.03it/s]
|
764 |
93%|ββββββββββ| 9960/10682 [1:24:40<05:55, 2.03it/s]
|
765 |
93%|ββββββββββ| 9961/10682 [1:24:40<05:55, 2.03it/s]
|
766 |
93%|ββββββββββ| 9962/10682 [1:24:41<05:54, 2.03it/s]
|
767 |
93%|ββββββββββ| 9963/10682 [1:24:41<05:54, 2.03it/s]
|
768 |
93%|ββββββββββ| 9964/10682 [1:24:42<05:53, 2.03it/s]
|
769 |
93%|ββββββββββ| 9965/10682 [1:24:42<05:53, 2.03it/s]
|
770 |
93%|ββββββββββ| 9966/10682 [1:24:43<05:52, 2.03it/s]
|
771 |
93%|ββββββββββ| 9967/10682 [1:24:43<05:51, 2.03it/s]
|
772 |
93%|ββββββββββ| 9968/10682 [1:24:44<05:51, 2.03it/s]
|
773 |
93%|ββββββββββ| 9969/10682 [1:24:44<05:50, 2.03it/s]
|
774 |
93%|ββββββββββ| 9970/10682 [1:24:45<05:49, 2.03it/s]
|
775 |
93%|ββββββββββ| 9971/10682 [1:24:45<05:49, 2.03it/s]
|
776 |
93%|ββββββββββ| 9972/10682 [1:24:46<05:49, 2.03it/s]
|
777 |
93%|ββββββββββ| 9973/10682 [1:24:46<05:48, 2.03it/s]
|
778 |
93%|ββββββββββ| 9974/10682 [1:24:47<05:48, 2.03it/s]
|
779 |
93%|ββββββββββ| 9975/10682 [1:24:47<05:48, 2.03it/s]
|
780 |
|
781 |
+
|
782 |
93%|ββββββββββ| 9975/10682 [1:24:47<05:48, 2.03it/s]
|
783 |
93%|ββββββββββ| 9976/10682 [1:24:48<05:48, 2.03it/s]
|
784 |
93%|ββββββββββ| 9977/10682 [1:24:48<05:47, 2.03it/s]
|
785 |
93%|ββββββββββ| 9978/10682 [1:24:49<05:46, 2.03it/s]
|
786 |
93%|ββββββββββ| 9979/10682 [1:24:49<05:46, 2.03it/s]
|
787 |
93%|ββββββββββ| 9980/10682 [1:24:50<05:45, 2.03it/s]
|
788 |
93%|ββββββββββ| 9981/10682 [1:24:50<05:45, 2.03it/s]
|
789 |
93%|ββββββββββ| 9982/10682 [1:24:51<05:44, 2.03it/s]
|
790 |
93%|ββββββββββ| 9983/10682 [1:24:51<05:44, 2.03it/s]
|
791 |
93%|ββββββββββ| 9984/10682 [1:24:52<05:43, 2.03it/s]
|
792 |
93%|ββββββββββ| 9985/10682 [1:24:52<05:43, 2.03it/s]
|
793 |
93%|ββββββββββ| 9986/10682 [1:24:53<05:43, 2.03it/s]
|
794 |
93%|ββββββββββ| 9987/10682 [1:24:53<05:42, 2.03it/s]
|
795 |
94%|ββββββββββ| 9988/10682 [1:24:53<05:41, 2.03it/s]
|
796 |
94%|ββββββββββ| 9989/10682 [1:24:54<05:41, 2.03it/s]
|
797 |
94%|ββββββββββ| 9990/10682 [1:24:54<05:40, 2.03it/s]
|
798 |
94%|ββββββββββ| 9991/10682 [1:24:55<05:39, 2.03it/s]
|
799 |
94%|ββββββββββ| 9992/10682 [1:24:55<05:39, 2.03it/s]
|
800 |
94%|ββββββββββ| 9993/10682 [1:24:56<05:38, 2.03it/s]
|
801 |
94%|ββββββββββ| 9994/10682 [1:24:56<05:38, 2.03it/s]
|
802 |
94%|ββββββββββ| 9995/10682 [1:24:57<05:38, 2.03it/s]
|
803 |
94%|ββββββββββ| 9996/10682 [1:24:57<05:37, 2.03it/s]
|
804 |
94%|ββββββββββ| 9997/10682 [1:24:58<05:37, 2.03it/s]
|
805 |
94%|ββββββββββ| 9998/10682 [1:24:58<05:36, 2.03it/s]
|
806 |
94%|ββββββββββ| 9999/10682 [1:24:59<05:35, 2.03it/s]
|
807 |
94%|ββββββββββ| 10000/10682 [1:24:59<05:35, 2.03it/s]
|
808 |
|
809 |
+
|
810 |
94%|ββββββββββ| 10000/10682 [1:24:59<05:35, 2.03it/s]
|
811 |
94%|ββββββββββ| 10001/10682 [1:25:00<05:35, 2.03it/s]
|
812 |
94%|ββββββββββ| 10002/10682 [1:25:00<05:35, 2.03it/s]
|
813 |
94%|ββββββββββ| 10003/10682 [1:25:01<05:34, 2.03it/s]
|
814 |
94%|ββββββββββ| 10004/10682 [1:25:01<05:33, 2.03it/s]
|
815 |
94%|ββββββββββ| 10005/10682 [1:25:02<05:33, 2.03it/s]
|
816 |
94%|ββββββββββ| 10006/10682 [1:25:02<05:32, 2.03it/s]
|
817 |
94%|ββββββββββ| 10007/10682 [1:25:03<05:32, 2.03it/s]
|
818 |
94%|ββββββββββ| 10008/10682 [1:25:03<05:31, 2.03it/s]
|
819 |
94%|ββββββββββ| 10009/10682 [1:25:04<05:31, 2.03it/s]
|
820 |
94%|ββββββββββ| 10010/10682 [1:25:04<05:31, 2.03it/s]
|
821 |
94%|ββββββββββ| 10011/10682 [1:25:05<05:30, 2.03it/s]
|
822 |
94%|ββββββββββ| 10012/10682 [1:25:05<05:30, 2.03it/s]
|
823 |
94%|ββββββββββ| 10013/10682 [1:25:06<05:29, 2.03it/s]
|
824 |
94%|ββββββββββ| 10014/10682 [1:25:06<05:28, 2.03it/s]
|
825 |
94%|ββββββββββ| 10015/10682 [1:25:07<05:28, 2.03it/s]
|
826 |
94%|ββββββββββ| 10016/10682 [1:25:07<05:27, 2.03it/s]
|
827 |
94%|ββββββββββ| 10017/10682 [1:25:08<05:27, 2.03it/s]
|
828 |
94%|ββββββββββ| 10018/10682 [1:25:08<05:27, 2.03it/s]
|
829 |
94%|ββββββββββ| 10019/10682 [1:25:09<05:26, 2.03it/s]
|
830 |
94%|ββββββββββ| 10020/10682 [1:25:09<05:26, 2.03it/s]
|
831 |
94%|ββββββββββ| 10021/10682 [1:25:10<05:25, 2.03it/s]
|
832 |
94%|ββββββββββ| 10022/10682 [1:25:10<05:25, 2.03it/s]
|
833 |
94%|ββββββββββ| 10023/10682 [1:25:11<05:24, 2.03it/s]
|
834 |
94%|ββββββββββ| 10024/10682 [1:25:11<05:24, 2.03it/s]
|
835 |
94%|ββββββββββ| 10025/10682 [1:25:12<05:23, 2.03it/s]
|
836 |
|
837 |
+
|
838 |
94%|ββββββββββ| 10025/10682 [1:25:12<05:23, 2.03it/s]
|
839 |
94%|ββββββββββ| 10026/10682 [1:25:12<05:23, 2.03it/s]
|
840 |
94%|ββββββββββ| 10027/10682 [1:25:13<05:23, 2.03it/s]
|
841 |
94%|ββββββββββ| 10028/10682 [1:25:13<05:22, 2.03it/s]
|
842 |
94%|ββββββββββ| 10029/10682 [1:25:14<05:22, 2.03it/s]
|
843 |
94%|ββββββββββ| 10030/10682 [1:25:14<05:21, 2.03it/s]
|
844 |
94%|ββββββββββ| 10031/10682 [1:25:15<05:20, 2.03it/s]
|
845 |
94%|ββββββββββ| 10032/10682 [1:25:15<05:20, 2.03it/s]
|
846 |
94%|ββββββββββ| 10033/10682 [1:25:16<05:19, 2.03it/s]
|
847 |
94%|ββββββββββ| 10034/10682 [1:25:16<05:19, 2.03it/s]
|
848 |
94%|ββββββββββ| 10035/10682 [1:25:17<05:18, 2.03it/s]
|
849 |
94%|ββββββββββ| 10036/10682 [1:25:17<05:18, 2.03it/s]
|
850 |
94%|ββββββββββ| 10037/10682 [1:25:18<05:17, 2.03it/s]
|
851 |
94%|ββββββββββ| 10038/10682 [1:25:18<05:17, 2.03it/s]
|
852 |
94%|ββββββββββ| 10039/10682 [1:25:19<05:16, 2.03it/s]
|
853 |
94%|ββββββββββ| 10040/10682 [1:25:19<05:16, 2.03it/s]
|
854 |
94%|ββββββββββ| 10041/10682 [1:25:20<05:15, 2.03it/s]
|
855 |
94%|ββββββββββ| 10042/10682 [1:25:20<05:15, 2.03it/s]
|
856 |
94%|ββββββββββ| 10043/10682 [1:25:21<05:14, 2.03it/s]
|
857 |
94%|ββββββββββ| 10044/10682 [1:25:21<05:14, 2.03it/s]
|
858 |
94%|ββββββββββ| 10045/10682 [1:25:22<05:13, 2.03it/s]
|
859 |
94%|ββββββββββ| 10046/10682 [1:25:22<05:13, 2.03it/s]
|
860 |
94%|ββββββββββ| 10047/10682 [1:25:23<05:12, 2.03it/s]
|
861 |
94%|ββββββββββ| 10048/10682 [1:25:23<05:12, 2.03it/s]
|
862 |
94%|ββββββββββ| 10049/10682 [1:25:24<05:11, 2.03it/s]
|
863 |
94%|ββββββββββ| 10050/10682 [1:25:24<05:11, 2.03it/s]
|
864 |
{'loss': 2.7813, 'grad_norm': 0.2624559998512268, 'learning_rate': 1.0627021937013704e-05, 'epoch': 13.17}
|
865 |
+
|
866 |
94%|ββββββββββ| 10050/10682 [1:25:24<05:11, 2.03it/s]
|
867 |
94%|ββββββββββ| 10051/10682 [1:25:25<05:11, 2.03it/s]
|
868 |
94%|ββββββββββ| 10052/10682 [1:25:25<05:10, 2.03it/s]
|
869 |
94%|ββββββββββ| 10053/10682 [1:25:25<05:09, 2.03it/s]
|
870 |
94%|ββββββββββ| 10054/10682 [1:25:26<05:09, 2.03it/s]
|
871 |
94%|ββββββββββ| 10055/10682 [1:25:26<05:08, 2.03it/s]
|
872 |
94%|ββββββββββ| 10056/10682 [1:25:27<05:08, 2.03it/s]
|
873 |
94%|ββββββββββ| 10057/10682 [1:25:27<05:08, 2.03it/s]
|
874 |
94%|ββββββββββ| 10058/10682 [1:25:28<05:07, 2.03it/s]
|
875 |
94%|ββββββββββ| 10059/10682 [1:25:28<05:07, 2.03it/s]
|
876 |
94%|ββββββββββ| 10060/10682 [1:25:29<05:06, 2.03it/s]
|
877 |
94%|ββββββββββ| 10061/10682 [1:25:29<05:06, 2.03it/s]
|
878 |
94%|ββββββββββ| 10062/10682 [1:25:30<05:05, 2.03it/s]
|
879 |
94%|ββββββββββ| 10063/10682 [1:25:30<05:04, 2.03it/s]
|
880 |
94%|ββββββββββ| 10064/10682 [1:25:31<05:04, 2.03it/s]
|
881 |
94%|ββββββββββ| 10065/10682 [1:25:31<05:03, 2.03it/s]
|
882 |
94%|ββββββββββ| 10066/10682 [1:25:32<05:03, 2.03it/s]
|
883 |
94%|ββββββββββ| 10067/10682 [1:25:32<05:02, 2.03it/s]
|
884 |
94%|ββββββββββ| 10068/10682 [1:25:33<05:02, 2.03it/s]
|
885 |
94%|ββββββββββ| 10069/10682 [1:25:33<05:02, 2.03it/s]
|
886 |
94%|ββββββββββ| 10070/10682 [1:25:34<05:01, 2.03it/s]
|
887 |
94%|ββββββββββ| 10071/10682 [1:25:34<05:00, 2.03it/s]
|
888 |
94%|ββββββββββ| 10072/10682 [1:25:35<05:00, 2.03it/s]
|
889 |
94%|ββββββββββ| 10073/10682 [1:25:35<04:59, 2.03it/s]
|
890 |
94%|ββββββββββ| 10074/10682 [1:25:36<04:59, 2.03it/s]
|
891 |
94%|ββββββββββ| 10075/10682 [1:25:36<04:58, 2.03it/s]
|
892 |
|
893 |
+
|
894 |
94%|ββββββββββ| 10075/10682 [1:25:36<04:58, 2.03it/s]
|
895 |
94%|ββββββββββ| 10076/10682 [1:25:37<04:59, 2.03it/s]
|
896 |
94%|ββββββββββ| 10077/10682 [1:25:37<04:58, 2.03it/s]
|
897 |
94%|ββββββββββ| 10078/10682 [1:25:38<04:57, 2.03it/s]
|
898 |
94%|ββββββββββ| 10079/10682 [1:25:38<04:57, 2.03it/s]
|
899 |
94%|ββββββββββ| 10080/10682 [1:25:39<04:56, 2.03it/s]
|
900 |
94%|ββββββββββ| 10081/10682 [1:25:39<04:56, 2.03it/s]
|
901 |
94%|ββββββββββ| 10082/10682 [1:25:40<04:56, 2.03it/s]
|
902 |
94%|ββββββββββ| 10083/10682 [1:25:40<04:55, 2.02it/s]
|
903 |
94%|ββββββββββ| 10084/10682 [1:25:41<04:55, 2.03it/s]
|
904 |
94%|ββββββββββ| 10085/10682 [1:25:41<04:54, 2.03it/s]
|
905 |
94%|ββββββββββ| 10086/10682 [1:25:42<04:54, 2.03it/s]
|
906 |
94%|ββββββββββ| 10087/10682 [1:25:42<04:53, 2.03it/s]
|
907 |
94%|ββββββββββ| 10088/10682 [1:25:43<04:53, 2.03it/s]
|
908 |
94%|ββββββββββ| 10089/10682 [1:25:43<05:19, 1.86it/s]
|
909 |
94%|ββββββββββ| 10090/10682 [1:25:44<05:10, 1.91it/s]
|
910 |
94%|ββββββββββ| 10091/10682 [1:25:44<05:04, 1.94it/s]
|
911 |
94%|ββββββββββ| 10092/10682 [1:25:45<04:59, 1.97it/s]
|
912 |
94%|ββββββββββ| 10093/10682 [1:25:45<04:56, 1.99it/s]
|
913 |
94%|ββββββββββ| 10094/10682 [1:25:46<04:54, 2.00it/s]
|
914 |
95%|ββββββββββ| 10095/10682 [1:25:46<04:52, 2.01it/s]
|
915 |
95%|ββββββββββ| 10096/10682 [1:25:47<04:50, 2.01it/s]
|
916 |
95%|ββββββββββ| 10097/10682 [1:25:47<04:49, 2.02it/s]
|
917 |
95%|ββββββββββ| 10098/10682 [1:25:48<04:48, 2.02it/s]
|
918 |
95%|ββββββββββ| 10099/10682 [1:25:48<04:47, 2.03it/s]
|
919 |
95%|ββββββββββ| 10100/10682 [1:25:49<04:47, 2.03it/s]{'loss': 2.7786, 'grad_norm': 0.26479992270469666, 'learning_rate': 9.01691900753926e-06, 'epoch': 13.24}
|
920 |
+
|
921 |
|
922 |
95%|ββββββββββ| 10100/10682 [1:25:49<04:47, 2.03it/s]
|
923 |
95%|ββββββββββ| 10101/10682 [1:25:49<04:46, 2.03it/s]
|
924 |
95%|ββββββββββ| 10102/10682 [1:25:50<04:46, 2.03it/s]
|
925 |
95%|ββββββββββ| 10103/10682 [1:25:50<04:45, 2.03it/s]
|
926 |
95%|ββββββββββ| 10104/10682 [1:25:51<04:45, 2.03it/s]
|
927 |
95%|ββββββββββ| 10105/10682 [1:25:51<04:44, 2.03it/s]
|
928 |
95%|ββββββββββ| 10106/10682 [1:25:52<04:43, 2.03it/s]
|
929 |
95%|ββββββββββ| 10107/10682 [1:25:52<04:43, 2.03it/s]
|
930 |
95%|ββββββββββ| 10108/10682 [1:25:53<04:42, 2.03it/s]
|
931 |
95%|ββββββββββ| 10109/10682 [1:25:53<04:42, 2.03it/s]
|
932 |
95%|ββββββββββ| 10110/10682 [1:25:54<04:41, 2.03it/s]
|
933 |
95%|ββββββββββ| 10111/10682 [1:25:54<04:41, 2.03it/s]
|
934 |
95%|ββββββββββ| 10112/10682 [1:25:55<04:40, 2.03it/s]
|
935 |
95%|ββββββββββ| 10113/10682 [1:25:55<04:40, 2.03it/s]
|
936 |
95%|ββββββββββ| 10114/10682 [1:25:56<04:39, 2.03it/s]
|
937 |
95%|ββββββββββ| 10115/10682 [1:25:56<04:39, 2.03it/s]
|
938 |
95%|ββββββββββ| 10116/10682 [1:25:57<04:38, 2.03it/s]
|
939 |
95%|ββββββββββ| 10117/10682 [1:25:57<04:38, 2.03it/s]
|
940 |
95%|ββββββββββ| 10118/10682 [1:25:58<04:38, 2.03it/s]
|
941 |
95%|ββββββββββ| 10119/10682 [1:25:58<04:37, 2.03it/s]
|
942 |
95%|ββββββββββ| 10120/10682 [1:25:59<04:37, 2.03it/s]
|
943 |
95%|ββββββββββ| 10121/10682 [1:25:59<04:36, 2.03it/s]
|
944 |
95%|ββββββββββ| 10122/10682 [1:26:00<04:36, 2.03it/s]
|
945 |
95%|ββββββββββ| 10123/10682 [1:26:00<04:35, 2.03it/s]
|
946 |
95%|ββββββββββ| 10124/10682 [1:26:01<04:35, 2.03it/s]
|
947 |
95%|ββββββββββ| 10125/10682 [1:26:01<04:34, 2.03it/s]
|
948 |
|
949 |
+
|
950 |
95%|ββββββββββ| 10125/10682 [1:26:01<04:34, 2.03it/s]
|
951 |
95%|ββββββββββ| 10126/10682 [1:26:02<04:34, 2.03it/s]
|
952 |
95%|ββββββββββ| 10127/10682 [1:26:02<04:33, 2.03it/s]
|
953 |
95%|ββββββββββ| 10128/10682 [1:26:03<04:32, 2.03it/s]
|
954 |
95%|ββββββββββ| 10129/10682 [1:26:03<04:32, 2.03it/s]
|
955 |
95%|βββββββββοΏ½οΏ½| 10130/10682 [1:26:04<04:31, 2.03it/s]
|
956 |
95%|ββββββββββ| 10131/10682 [1:26:04<04:31, 2.03it/s]
|
957 |
95%|ββββββββββ| 10132/10682 [1:26:05<04:31, 2.03it/s]
|
958 |
95%|ββββββββββ| 10133/10682 [1:26:05<04:30, 2.03it/s]
|
959 |
95%|ββββββββββ| 10134/10682 [1:26:06<04:30, 2.03it/s]
|
960 |
95%|ββββββββββ| 10135/10682 [1:26:06<04:29, 2.03it/s]
|
961 |
95%|ββββββββββ| 10136/10682 [1:26:07<04:29, 2.03it/s]
|
962 |
95%|ββββββββββ| 10137/10682 [1:26:07<04:28, 2.03it/s]
|
963 |
95%|ββββββββββ| 10138/10682 [1:26:08<04:27, 2.03it/s]
|
964 |
95%|ββββββββββ| 10139/10682 [1:26:08<04:27, 2.03it/s]
|
965 |
95%|ββββββββββ| 10140/10682 [1:26:09<04:26, 2.03it/s]
|
966 |
95%|ββββββββββ| 10141/10682 [1:26:09<04:26, 2.03it/s]
|
967 |
95%|ββββββββββ| 10142/10682 [1:26:10<04:26, 2.03it/s]
|
968 |
95%|ββββββββββ| 10143/10682 [1:26:10<04:25, 2.03it/s]
|
969 |
95%|ββββββββββ| 10144/10682 [1:26:10<04:25, 2.03it/s]
|
970 |
95%|ββββββββββ| 10145/10682 [1:26:11<04:24, 2.03it/s]
|
971 |
95%|ββββββββββ| 10146/10682 [1:26:11<04:24, 2.03it/s]
|
972 |
95%|ββββββββββ| 10147/10682 [1:26:12<04:23, 2.03it/s]
|
973 |
95%|ββββββββββ| 10148/10682 [1:26:12<04:22, 2.03it/s]
|
974 |
95%|ββββββββββ| 10149/10682 [1:26:13<04:22, 2.03it/s]
|
975 |
95%|ββββββββββ| 10150/10682 [1:26:13<04:21, 2.03it/s]{'loss': 2.7769, 'grad_norm': 0.26382389664649963, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.3}
|
976 |
|
977 |
+
|
978 |
95%|ββββββββββ| 10150/10682 [1:26:13<04:21, 2.03it/s]
|
979 |
95%|ββββββββββ| 10151/10682 [1:26:14<04:21, 2.03it/s]
|
980 |
95%|ββββββββββ| 10152/10682 [1:26:14<04:21, 2.03it/s]
|
981 |
95%|ββββββββββ| 10153/10682 [1:26:15<04:21, 2.03it/s]
|
982 |
95%|ββββββββββ| 10154/10682 [1:26:15<04:20, 2.03it/s]
|
983 |
95%|ββββββββββ| 10155/10682 [1:26:16<04:19, 2.03it/s]
|
984 |
95%|ββββββββββ| 10156/10682 [1:26:16<04:19, 2.03it/s]
|
985 |
95%|ββββββββββ| 10157/10682 [1:26:17<04:18, 2.03it/s]
|
986 |
95%|ββββββββββ| 10158/10682 [1:26:17<04:18, 2.03it/s]
|
987 |
95%|ββββββββββ| 10159/10682 [1:26:18<04:17, 2.03it/s]
|
988 |
95%|ββββββββββ| 10160/10682 [1:26:18<04:17, 2.03it/s]
|
989 |
95%|ββββββββββ| 10161/10682 [1:26:19<04:16, 2.03it/s]
|
990 |
95%|ββββββββββ| 10162/10682 [1:26:19<04:16, 2.03it/s]
|
991 |
95%|ββββββββββ| 10163/10682 [1:26:20<04:15, 2.03it/s]
|
992 |
95%|ββββββββββ| 10164/10682 [1:26:20<04:15, 2.03it/s]
|
993 |
95%|ββββββββββ| 10165/10682 [1:26:21<04:15, 2.03it/s]
|
994 |
95%|ββββββββββ| 10166/10682 [1:26:21<04:14, 2.03it/s]
|
995 |
95%|ββββββββββ| 10167/10682 [1:26:22<04:14, 2.03it/s]
|
996 |
95%|ββββββββββ| 10168/10682 [1:26:22<04:13, 2.03it/s]
|
997 |
95%|ββββββββββ| 10169/10682 [1:26:23<04:12, 2.03it/s]
|
998 |
95%|ββββββββββ| 10170/10682 [1:26:23<04:12, 2.03it/s]
|
999 |
95%|ββββββββββ| 10171/10682 [1:26:24<04:11, 2.03it/s]
|
1000 |
95%|ββββββββββ| 10172/10682 [1:26:24<04:11, 2.03it/s]
|
1001 |
95%|ββββββββββ| 10173/10682 [1:26:25<04:10, 2.03it/s]
|
1002 |
95%|ββββββββββ| 10174/10682 [1:26:25<04:10, 2.03it/s]
|
1003 |
95%|ββββββββββ| 10175/10682 [1:26:26<04:09, 2.03it/s]{'loss': 2.7838, 'grad_norm': 0.26716604828834534, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.34}
|
1004 |
+
|
1005 |
|
1006 |
95%|ββββββββββ| 10175/10682 [1:26:26<04:09, 2.03it/s]
|
1007 |
95%|ββββββββββ| 10176/10682 [1:26:26<04:09, 2.03it/s]
|
1008 |
95%|ββββββββββ| 10177/10682 [1:26:27<04:08, 2.03it/s]
|
1009 |
95%|ββββββββββ| 10178/10682 [1:26:27<04:08, 2.03it/s]
|
1010 |
95%|ββββββββββ| 10179/10682 [1:26:28<04:07, 2.03it/s]
|
1011 |
95%|ββββββββββ| 10180/10682 [1:26:28<04:07, 2.03it/s]
|
1012 |
95%|ββββββββββ| 10181/10682 [1:26:29<04:06, 2.03it/s]
|
1013 |
95%|ββββββββββ| 10182/10682 [1:26:29<04:06, 2.03it/s]
|
1014 |
95%|ββββββββββ| 10183/10682 [1:26:30<04:06, 2.03it/s]
|
1015 |
95%|ββββββββββ| 10184/10682 [1:26:30<04:05, 2.03it/s]
|
1016 |
95%|ββββββββββ| 10185/10682 [1:26:31<04:04, 2.03it/s]
|
1017 |
95%|ββββββββββ| 10186/10682 [1:26:31<04:04, 2.03it/s]
|
1018 |
95%|ββββββββββ| 10187/10682 [1:26:32<04:03, 2.03it/s]
|
1019 |
95%|ββββββββββ| 10188/10682 [1:26:32<04:03, 2.03it/s]
|
1020 |
95%|ββββββββββ| 10189/10682 [1:26:33<04:02, 2.03it/s]
|
1021 |
95%|ββββββββββ| 10190/10682 [1:26:33<04:02, 2.03it/s]
|
1022 |
95%|ββββββββββ| 10191/10682 [1:26:34<04:02, 2.03it/s]
|
1023 |
95%|ββββββββββ| 10192/10682 [1:26:34<04:01, 2.03it/s]
|
1024 |
95%|ββββββββββ| 10193/10682 [1:26:35<04:01, 2.03it/s]
|
1025 |
95%|ββββββββββ| 10194/10682 [1:26:35<04:00, 2.03it/s]
|
1026 |
95%|ββββββββββ| 10195/10682 [1:26:36<04:00, 2.03it/s]
|
1027 |
95%|ββββββββββ| 10196/10682 [1:26:36<03:59, 2.03it/s]
|
1028 |
95%|ββββββββββ| 10197/10682 [1:26:37<03:59, 2.03it/s]
|
1029 |
95%|ββββββββββ| 10198/10682 [1:26:37<03:58, 2.03it/s]
|
1030 |
95%|ββββββββββ| 10199/10682 [1:26:38<03:58, 2.03it/s]
|
1031 |
95%|ββββββββββ| 10200/10682 [1:26:38<03:57, 2.03it/s]
|
1032 |
|
1033 |
+
|
1034 |
95%|ββββββββββ| 10200/10682 [1:26:38<03:57, 2.03it/s]
|
1035 |
95%|ββββββββββ| 10201/10682 [1:26:39<03:57, 2.03it/s]
|
1036 |
96%|ββββββββββ| 10202/10682 [1:26:39<03:56, 2.03it/s]
|
1037 |
96%|ββββββββββ| 10203/10682 [1:26:40<03:56, 2.03it/s]
|
1038 |
96%|ββββββββββ| 10204/10682 [1:26:40<03:55, 2.03it/s]
|
1039 |
96%|ββββββββββ| 10205/10682 [1:26:41<03:55, 2.03it/s]
|
1040 |
96%|ββββββββββ| 10206/10682 [1:26:41<03:54, 2.03it/s]
|
1041 |
96%|ββββββββββ| 10207/10682 [1:26:42<03:54, 2.03it/s]
|
1042 |
96%|ββββββββββ| 10208/10682 [1:26:42<03:53, 2.03it/s]
|
1043 |
96%|ββββββββββ| 10209/10682 [1:26:43<03:53, 2.03it/s]
|
1044 |
96%|ββββββββββ| 10210/10682 [1:26:43<03:52, 2.03it/s]
|
1045 |
96%|ββββββββββ| 10211/10682 [1:26:44<04:11, 1.87it/s]
|
1046 |
96%|ββββββββββ| 10212/10682 [1:26:44<04:05, 1.91it/s]
|
1047 |
96%|ββββββββββ| 10213/10682 [1:26:45<04:00, 1.95it/s]
|
1048 |
96%|ββββββββββ| 10214/10682 [1:26:45<03:57, 1.97it/s]
|
1049 |
96%|ββββββββββ| 10215/10682 [1:26:46<03:54, 1.99it/s]
|
1050 |
96%|ββββββββββ| 10216/10682 [1:26:46<03:53, 2.00it/s]
|
1051 |
96%|ββββββββββ| 10217/10682 [1:26:47<03:51, 2.01it/s]
|
1052 |
96%|ββββββββββ| 10218/10682 [1:26:47<03:50, 2.02it/s]
|
1053 |
96%|ββββββββββ| 10219/10682 [1:26:48<03:49, 2.02it/s]
|
1054 |
96%|ββββββββββ| 10220/10682 [1:26:48<03:48, 2.02it/s]
|
1055 |
96%|ββββββββββ| 10221/10682 [1:26:49<03:47, 2.03it/s]
|
1056 |
96%|ββββββββββ| 10222/10682 [1:26:49<03:47, 2.03it/s]
|
1057 |
96%|ββββββββββ| 10223/10682 [1:26:50<03:46, 2.03it/s]
|
1058 |
96%|ββββββββββ| 10224/10682 [1:26:50<03:46, 2.03it/s]
|
1059 |
96%|ββββββββββ| 10225/10682 [1:26:51<03:45, 2.03it/s]
|
1060 |
|
1061 |
+
|
1062 |
96%|ββββββββββ| 10225/10682 [1:26:51<03:45, 2.03it/s]
|
1063 |
96%|ββββββββββ| 10226/10682 [1:26:51<03:45, 2.02it/s]
|
1064 |
96%|ββββββββββ| 10227/10682 [1:26:52<03:44, 2.03it/s]
|
1065 |
96%|ββββββββββ| 10228/10682 [1:26:52<03:43, 2.03it/s]
|
1066 |
96%|ββββββββββ| 10229/10682 [1:26:53<03:43, 2.03it/s]
|
1067 |
96%|ββββββββββ| 10230/10682 [1:26:53<03:42, 2.03it/s]
|
1068 |
96%|ββββββββββ| 10231/10682 [1:26:54<03:42, 2.03it/s]
|
1069 |
96%|ββββββββββ| 10232/10682 [1:26:54<03:41, 2.03it/s]
|
1070 |
96%|ββββββββββ| 10233/10682 [1:26:55<03:41, 2.03it/s]
|
1071 |
96%|ββββββββββ| 10234/10682 [1:26:55<03:40, 2.03it/s]
|
1072 |
96%|ββββββββββ| 10235/10682 [1:26:55<03:40, 2.03it/s]
|
1073 |
96%|ββββββββββ| 10236/10682 [1:26:56<03:39, 2.03it/s]
|
1074 |
96%|ββββββββββ| 10237/10682 [1:26:56<03:39, 2.03it/s]
|
1075 |
96%|ββββββββββ| 10238/10682 [1:26:57<03:38, 2.03it/s]
|
1076 |
96%|ββββββββββ| 10239/10682 [1:26:57<03:37, 2.03it/s]
|
1077 |
96%|ββββββββββ| 10240/10682 [1:26:58<03:37, 2.03it/s]
|
1078 |
96%|ββββββββββ| 10241/10682 [1:26:58<03:37, 2.03it/s]
|
1079 |
96%|ββββββββββ| 10242/10682 [1:26:59<03:36, 2.03it/s]
|
1080 |
96%|ββββββββββ| 10243/10682 [1:26:59<03:36, 2.03it/s]
|
1081 |
96%|ββββββββββ| 10244/10682 [1:27:00<03:35, 2.03it/s]
|
1082 |
96%|ββββββββββ| 10245/10682 [1:27:00<03:35, 2.03it/s]
|
1083 |
96%|ββββββββββ| 10246/10682 [1:27:01<03:34, 2.03it/s]
|
1084 |
96%|ββββββββββ| 10247/10682 [1:27:01<03:34, 2.03it/s]
|
1085 |
96%|ββββββββββ| 10248/10682 [1:27:02<03:33, 2.03it/s]
|
1086 |
96%|ββββββββββ| 10249/10682 [1:27:02<03:33, 2.03it/s]
|
1087 |
96%|ββββββββββ| 10250/10682 [1:27:03<03:32, 2.03it/s]{'loss': 2.7947, 'grad_norm': 0.26169490814208984, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
1088 |
+
|
1089 |
|
1090 |
96%|ββββββββββ| 10250/10682 [1:27:03<03:32, 2.03it/s]
|
1091 |
96%|ββββββββββ| 10251/10682 [1:27:03<03:32, 2.03it/s]
|
1092 |
96%|ββββββββββ| 10252/10682 [1:27:04<03:31, 2.03it/s]
|
1093 |
96%|ββββββββββ| 10253/10682 [1:27:04<03:31, 2.03it/s]
|
1094 |
96%|ββββββββββ| 10254/10682 [1:27:05<03:31, 2.03it/s]
|
1095 |
96%|ββββββββββ| 10255/10682 [1:27:05<03:30, 2.03it/s]
|
1096 |
96%|ββββββββββ| 10256/10682 [1:27:06<03:30, 2.03it/s]
|
1097 |
96%|ββββββββββ| 10257/10682 [1:27:06<03:29, 2.03it/s]
|
1098 |
96%|ββββββββββ| 10258/10682 [1:27:07<03:28, 2.03it/s]
|
1099 |
96%|ββββββββββ| 10259/10682 [1:27:07<03:28, 2.03it/s]
|
1100 |
96%|ββββββββββ| 10260/10682 [1:27:08<03:28, 2.03it/s]
|
1101 |
96%|ββββββββββ| 10261/10682 [1:27:08<03:27, 2.03it/s]
|
1102 |
96%|ββββββββββ| 10262/10682 [1:27:09<03:26, 2.03it/s]
|
1103 |
96%|ββββββββββ| 10263/10682 [1:27:09<03:26, 2.03it/s]
|
1104 |
96%|ββββββββββ| 10264/10682 [1:27:10<03:26, 2.03it/s]
|
1105 |
96%|ββββββββββ| 10265/10682 [1:27:10<03:25, 2.03it/s]
|
1106 |
96%|ββββββββββ| 10266/10682 [1:27:11<03:24, 2.03it/s]
|
1107 |
96%|ββββββββββ| 10267/10682 [1:27:11<03:24, 2.03it/s]
|
1108 |
96%|ββββββββββ| 10268/10682 [1:27:12<03:24, 2.03it/s]
|
1109 |
96%|ββββββββββ| 10269/10682 [1:27:12<03:23, 2.03it/s]
|
1110 |
96%|ββββββββββ| 10270/10682 [1:27:13<03:23, 2.03it/s]
|
1111 |
96%|ββββββββββ| 10271/10682 [1:27:13<03:22, 2.03it/s]
|
1112 |
96%|ββββββββββ| 10272/10682 [1:27:14<03:22, 2.03it/s]
|
1113 |
96%|ββββββββββ| 10273/10682 [1:27:14<03:21, 2.03it/s]
|
1114 |
96%|ββββββββββ| 10274/10682 [1:27:15<03:20, 2.03it/s]
|
1115 |
96%|ββββββββββ| 10275/10682 [1:27:15<03:20, 2.03it/s]{'loss': 2.7872, 'grad_norm': 0.2615707516670227, 'learning_rate': 4.416419388921844e-06, 'epoch': 13.47}
|
1116 |
|
1117 |
+
|
1118 |
96%|ββββββββββ| 10275/10682 [1:27:15<03:20, 2.03it/s]
|
1119 |
96%|ββββββββββ| 10276/10682 [1:27:16<03:20, 2.03it/s]
|
1120 |
96%|ββββββββββ| 10277/10682 [1:27:16<03:19, 2.03it/s]
|
1121 |
96%|ββββββββββ| 10278/10682 [1:27:17<03:19, 2.03it/s]
|
1122 |
96%|ββββββββββ| 10279/10682 [1:27:17<03:18, 2.03it/s]
|
1123 |
96%|ββββββββββ| 10280/10682 [1:27:18<03:18, 2.03it/s]
|
1124 |
96%|ββββββββββ| 10281/10682 [1:27:18<03:17, 2.03it/s]
|
1125 |
96%|ββββββββββ| 10282/10682 [1:27:19<03:17, 2.03it/s]
|
1126 |
96%|ββββββββββ| 10283/10682 [1:27:19<03:16, 2.03it/s]
|
1127 |
96%|ββββββββββ| 10284/10682 [1:27:20<03:16, 2.03it/s]
|
1128 |
96%|ββββββββββ| 10285/10682 [1:27:20<03:15, 2.03it/s]
|
1129 |
96%|ββββββββββ| 10286/10682 [1:27:21<03:15, 2.03it/s]
|
1130 |
96%|ββββββββββ| 10287/10682 [1:27:21<03:14, 2.03it/s]
|
1131 |
96%|ββββββββββ| 10288/10682 [1:27:22<03:14, 2.03it/s]
|
1132 |
96%|ββββββββββ| 10289/10682 [1:27:22<03:13, 2.03it/s]
|
1133 |
96%|ββββββββββ| 10290/10682 [1:27:23<03:13, 2.03it/s]
|
1134 |
96%|ββββββββββ| 10291/10682 [1:27:23<03:12, 2.03it/s]
|
1135 |
96%|ββββββββββ| 10292/10682 [1:27:24<03:12, 2.03it/s]
|
1136 |
96%|ββββββββββ| 10293/10682 [1:27:24<03:11, 2.03it/s]
|
1137 |
96%|ββββββββββ| 10294/10682 [1:27:25<03:10, 2.03it/s]
|
1138 |
96%|ββββββββββ| 10295/10682 [1:27:25<03:10, 2.03it/s]
|
1139 |
96%|ββββββββββ| 10296/10682 [1:27:26<03:10, 2.03it/s]
|
1140 |
96%|ββββββββββ| 10297/10682 [1:27:26<03:09, 2.03it/s]
|
1141 |
96%|ββββββββββ| 10298/10682 [1:27:27<03:09, 2.03it/s]
|
1142 |
96%|ββββββββββ| 10299/10682 [1:27:27<03:08, 2.03it/s]
|
1143 |
96%|ββββββββββ| 10300/10682 [1:27:28<03:08, 2.03it/s]{'loss': 2.7753, 'grad_norm': 0.2637779116630554, 'learning_rate': 3.891208300917604e-06, 'epoch': 13.5}
|
1144 |
+
|
1145 |
|
1146 |
96%|ββββββββββ| 10300/10682 [1:27:28<03:08, 2.03it/s]
|
1147 |
96%|ββββββββββ| 10301/10682 [1:27:28<03:07, 2.03it/s]
|
1148 |
96%|ββββββββββ| 10302/10682 [1:27:29<03:07, 2.03it/s]
|
1149 |
96%|ββββββββββ| 10303/10682 [1:27:29<03:06, 2.03it/s]
|
1150 |
96%|ββββββββββ| 10304/10682 [1:27:29<03:06, 2.03it/s]
|
1151 |
96%|ββββββββββ| 10305/10682 [1:27:30<03:05, 2.03it/s]
|
1152 |
96%|ββββββββββ| 10306/10682 [1:27:30<03:05, 2.03it/s]
|
1153 |
96%|ββββββββββ| 10307/10682 [1:27:31<03:04, 2.03it/s]
|
1154 |
96%|ββββββββββ| 10308/10682 [1:27:31<03:04, 2.03it/s]
|
1155 |
97%|ββββββββββ| 10309/10682 [1:27:32<03:03, 2.03it/s]
|
1156 |
97%|βββββββββοΏ½οΏ½| 10310/10682 [1:27:32<03:03, 2.03it/s]
|
1157 |
97%|ββββββββββ| 10311/10682 [1:27:33<03:02, 2.03it/s]
|
1158 |
97%|ββββββββββ| 10312/10682 [1:27:33<03:02, 2.03it/s]
|
1159 |
97%|ββββββββββ| 10313/10682 [1:27:34<03:01, 2.03it/s]
|
1160 |
97%|ββββββββββ| 10314/10682 [1:27:34<03:01, 2.03it/s]
|
1161 |
97%|ββββββββββ| 10315/10682 [1:27:35<03:01, 2.03it/s]
|
1162 |
97%|ββββββββββ| 10316/10682 [1:27:35<03:00, 2.03it/s]
|
1163 |
97%|ββββββββββ| 10317/10682 [1:27:36<03:00, 2.03it/s]
|
1164 |
97%|ββββββββββ| 10318/10682 [1:27:36<02:59, 2.03it/s]
|
1165 |
97%|ββββββββββ| 10319/10682 [1:27:37<02:59, 2.02it/s]
|
1166 |
97%|ββββββββββ| 10320/10682 [1:27:37<02:58, 2.03it/s]
|
1167 |
97%|ββββββββββ| 10321/10682 [1:27:38<02:58, 2.03it/s]
|
1168 |
97%|ββββββββββ| 10322/10682 [1:27:38<02:57, 2.03it/s]
|
1169 |
97%|ββββββββββ| 10323/10682 [1:27:39<02:56, 2.03it/s]
|
1170 |
97%|ββββββββββ| 10324/10682 [1:27:39<02:56, 2.03it/s]
|
1171 |
97%|ββββββββββ| 10325/10682 [1:27:40<02:55, 2.03it/s]
|
1172 |
|
1173 |
+
|
1174 |
97%|ββββββββββ| 10325/10682 [1:27:40<02:55, 2.03it/s]
|
1175 |
97%|ββββββββββ| 10326/10682 [1:27:40<02:55, 2.03it/s]
|
1176 |
97%|ββββββββββ| 10327/10682 [1:27:41<02:55, 2.03it/s]
|
1177 |
97%|ββββββββββ| 10328/10682 [1:27:41<02:54, 2.03it/s]
|
1178 |
97%|ββββββββββ| 10329/10682 [1:27:42<02:54, 2.03it/s]
|
1179 |
97%|ββββββββββ| 10330/10682 [1:27:42<02:53, 2.03it/s]
|
1180 |
97%|ββββββββββ| 10331/10682 [1:27:43<02:52, 2.03it/s]
|
1181 |
97%|ββββββββββ| 10332/10682 [1:27:43<02:52, 2.03it/s]
|
1182 |
97%|ββββββββββ| 10333/10682 [1:27:44<02:51, 2.03it/s]
|
1183 |
97%|ββββββββββ| 10334/10682 [1:27:44<02:51, 2.03it/s]
|
1184 |
97%|ββββββββββ| 10335/10682 [1:27:45<02:50, 2.03it/s]
|
1185 |
97%|ββββββββββ| 10336/10682 [1:27:45<02:50, 2.03it/s]
|
1186 |
97%|ββββββββββ| 10337/10682 [1:27:46<02:50, 2.03it/s]
|
1187 |
97%|ββββββββββ| 10338/10682 [1:27:46<02:49, 2.03it/s]
|
1188 |
97%|ββββββββββ| 10339/10682 [1:27:47<02:49, 2.03it/s]
|
1189 |
97%|ββββββββββ| 10340/10682 [1:27:47<02:48, 2.03it/s]
|
1190 |
97%|ββββββββββ| 10341/10682 [1:27:48<02:47, 2.03it/s]
|
1191 |
97%|ββββββββββ| 10342/10682 [1:27:48<02:47, 2.03it/s]
|
1192 |
97%|ββββββββββ| 10343/10682 [1:27:49<02:46, 2.03it/s]
|
1193 |
97%|ββββββββββ| 10344/10682 [1:27:49<02:46, 2.03it/s]
|
1194 |
97%|ββββββββββ| 10345/10682 [1:27:50<02:45, 2.03it/s]
|
1195 |
97%|ββββββββββ| 10346/10682 [1:27:50<02:45, 2.03it/s]
|
1196 |
97%|ββββββββββ| 10347/10682 [1:27:51<02:45, 2.03it/s]
|
1197 |
97%|ββββββββββ| 10348/10682 [1:27:51<02:44, 2.03it/s]
|
1198 |
97%|ββββββββββ| 10349/10682 [1:27:52<02:44, 2.03it/s]
|
1199 |
97%|ββββββββββ| 10350/10682 [1:27:52<02:43, 2.03it/s]{'loss': 2.786, 'grad_norm': 0.2615926265716553, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
1200 |
+
|
1201 |
|
1202 |
97%|ββββββββββ| 10350/10682 [1:27:52<02:43, 2.03it/s]
|
1203 |
97%|ββββββββββ| 10351/10682 [1:27:53<02:43, 2.02it/s]
|
1204 |
97%|ββββββββββ| 10352/10682 [1:27:53<02:43, 2.02it/s]
|
1205 |
97%|ββββββββββ| 10353/10682 [1:27:54<02:42, 2.02it/s]
|
1206 |
97%|ββββββββββ| 10354/10682 [1:27:54<02:41, 2.03it/s]
|
1207 |
97%|ββββββββββ| 10355/10682 [1:27:55<02:41, 2.03it/s]
|
1208 |
97%|ββββββββββ| 10356/10682 [1:27:55<02:40, 2.03it/s]
|
1209 |
97%|ββββββββββ| 10357/10682 [1:27:56<02:40, 2.03it/s]
|
1210 |
97%|ββββββββββ| 10358/10682 [1:27:56<02:39, 2.03it/s]
|
1211 |
97%|ββββββββββ| 10359/10682 [1:27:57<02:39, 2.03it/s]
|
1212 |
97%|ββββββββββ| 10360/10682 [1:27:57<02:38, 2.03it/s]
|
1213 |
97%|ββββββββββ| 10361/10682 [1:27:58<02:38, 2.03it/s]
|
1214 |
97%|ββββββββββ| 10362/10682 [1:27:58<02:37, 2.03it/s]
|
1215 |
97%|ββββββββββ| 10363/10682 [1:27:59<02:37, 2.03it/s]
|
1216 |
97%|ββββββββββ| 10364/10682 [1:27:59<02:36, 2.03it/s]
|
1217 |
97%|ββββββββββ| 10365/10682 [1:28:00<02:36, 2.03it/s]
|
1218 |
97%|ββββββββββ| 10366/10682 [1:28:00<02:35, 2.03it/s]
|
1219 |
97%|ββββββββββ| 10367/10682 [1:28:01<02:35, 2.03it/s]
|
1220 |
97%|ββββββββββ| 10368/10682 [1:28:01<02:34, 2.03it/s]
|
1221 |
97%|ββββββββββ| 10369/10682 [1:28:02<02:34, 2.03it/s]
|
1222 |
97%|ββββββββββ| 10370/10682 [1:28:02<02:33, 2.03it/s]
|
1223 |
97%|ββββββββββ| 10371/10682 [1:28:03<02:33, 2.03it/s]
|
1224 |
97%|ββββββββββ| 10372/10682 [1:28:03<02:32, 2.03it/s]
|
1225 |
97%|ββββββββββ| 10373/10682 [1:28:04<02:32, 2.03it/s]
|
1226 |
97%|ββββββββββ| 10374/10682 [1:28:04<02:31, 2.03it/s]
|
1227 |
97%|ββββββββββ| 10375/10682 [1:28:04<02:31, 2.03it/s]{'loss': 2.7851, 'grad_norm': 0.26228195428848267, 'learning_rate': 2.5143995351817882e-06, 'epoch': 13.6}
|
1228 |
+
|
1229 |
|
1230 |
97%|ββββββββββ| 10375/10682 [1:28:04<02:31, 2.03it/s]
|
1231 |
97%|ββββββββββ| 10376/10682 [1:28:05<02:31, 2.02it/s]
|
1232 |
97%|ββββββββββ| 10377/10682 [1:28:05<02:30, 2.02it/s]
|
1233 |
97%|ββββββββββ| 10378/10682 [1:28:06<02:30, 2.02it/s]
|
1234 |
97%|ββββββββββ| 10379/10682 [1:28:06<02:29, 2.03it/s]
|
1235 |
97%|ββββββββββ| 10380/10682 [1:28:07<02:29, 2.03it/s]
|
1236 |
97%|ββββββββββ| 10381/10682 [1:28:07<02:28, 2.03it/s]
|
1237 |
97%|ββββββββββ| 10382/10682 [1:28:08<02:27, 2.03it/s]
|
1238 |
97%|ββββββββββ| 10383/10682 [1:28:08<02:27, 2.03it/s]
|
1239 |
97%|ββββββββββ| 10384/10682 [1:28:09<02:26, 2.03it/s]
|
1240 |
97%|ββββββββββ| 10385/10682 [1:28:09<02:26, 2.03it/s]
|
1241 |
97%|ββββββββββ| 10386/10682 [1:28:10<02:25, 2.03it/s]
|
1242 |
97%|ββββββββββ| 10387/10682 [1:28:10<02:25, 2.03it/s]
|
1243 |
97%|ββββββββββ| 10388/10682 [1:28:11<02:24, 2.03it/s]
|
1244 |
97%|ββββββββββ| 10389/10682 [1:28:11<02:24, 2.03it/s]
|
1245 |
97%|ββββββββββ| 10390/10682 [1:28:12<02:23, 2.03it/s]
|
1246 |
97%|ββββββββββ| 10391/10682 [1:28:12<02:23, 2.03it/s]
|
1247 |
97%|ββββββββββ| 10392/10682 [1:28:13<02:23, 2.03it/s]
|
1248 |
97%|ββββββββββ| 10393/10682 [1:28:13<02:22, 2.03it/s]
|
1249 |
97%|ββββββββββ| 10394/10682 [1:28:14<02:21, 2.03it/s]
|
1250 |
97%|ββββββββββ| 10395/10682 [1:28:14<02:21, 2.03it/s]
|
1251 |
97%|ββββββββββ| 10396/10682 [1:28:15<02:20, 2.03it/s]
|
1252 |
97%|ββββββββββ| 10397/10682 [1:28:15<02:20, 2.03it/s]
|
1253 |
97%|ββββββββββ| 10398/10682 [1:28:16<02:19, 2.03it/s]
|
1254 |
97%|ββββββββββ| 10399/10682 [1:28:16<02:19, 2.03it/s]
|
1255 |
97%|ββββββββββ| 10400/10682 [1:28:17<02:18, 2.03it/s]
|
1256 |
|
1257 |
+
|
1258 |
97%|ββββββββββ| 10400/10682 [1:28:17<02:18, 2.03it/s]
|
1259 |
97%|ββββββββββ| 10401/10682 [1:28:17<02:18, 2.03it/s]
|
1260 |
97%|ββββββββββ| 10402/10682 [1:28:18<02:18, 2.03it/s]
|
1261 |
97%|ββββββββββ| 10403/10682 [1:28:18<02:17, 2.03it/s]
|
1262 |
97%|ββββββββββ| 10404/10682 [1:28:19<02:17, 2.03it/s]
|
1263 |
97%|ββββββββββ| 10405/10682 [1:28:19<02:16, 2.03it/s]
|
1264 |
97%|ββββββββββ| 10406/10682 [1:28:20<02:15, 2.03it/s]
|
1265 |
97%|ββββββββββ| 10407/10682 [1:28:20<02:15, 2.03it/s]
|
1266 |
97%|ββββββββββ| 10408/10682 [1:28:21<02:14, 2.03it/s]
|
1267 |
97%|ββββββββββ| 10409/10682 [1:28:21<02:14, 2.03it/s]
|
1268 |
97%|ββββββββββ| 10410/10682 [1:28:22<02:14, 2.03it/s]
|
1269 |
97%|ββββββββββ| 10411/10682 [1:28:22<02:13, 2.03it/s]
|
1270 |
97%|ββββββββββ| 10412/10682 [1:28:23<02:13, 2.03it/s]
|
1271 |
97%|ββββββββββ| 10413/10682 [1:28:23<02:12, 2.03it/s]
|
1272 |
97%|ββββββββββ| 10414/10682 [1:28:24<02:12, 2.03it/s]
|
1273 |
98%|ββββββββββ| 10415/10682 [1:28:24<02:11, 2.03it/s]
|
1274 |
98%|ββββββββββ| 10416/10682 [1:28:25<02:11, 2.02it/s]
|
1275 |
98%|ββββββββββ| 10417/10682 [1:28:25<02:10, 2.03it/s]
|
1276 |
98%|ββββββββββ| 10418/10682 [1:28:26<02:10, 2.03it/s]
|
1277 |
98%|ββββββββββ| 10419/10682 [1:28:26<02:09, 2.03it/s]
|
1278 |
98%|ββββββββββ| 10420/10682 [1:28:27<02:09, 2.03it/s]
|
1279 |
98%|ββββββββββ| 10421/10682 [1:28:27<02:08, 2.03it/s]
|
1280 |
98%|ββββββββββ| 10422/10682 [1:28:28<02:08, 2.03it/s]
|
1281 |
98%|ββββββββββ| 10423/10682 [1:28:28<02:07, 2.03it/s]
|
1282 |
98%|ββββββββββ| 10424/10682 [1:28:29<02:07, 2.03it/s]
|
1283 |
98%|ββββββββββ| 10425/10682 [1:28:29<02:06, 2.03it/s]
|
1284 |
|
1285 |
+
|
1286 |
98%|ββββββββββ| 10425/10682 [1:28:29<02:06, 2.03it/s]
|
1287 |
98%|ββββββββββ| 10426/10682 [1:28:30<02:06, 2.03it/s]
|
1288 |
98%|ββββββββββ| 10427/10682 [1:28:30<02:05, 2.03it/s]
|
1289 |
98%|ββββββββββ| 10428/10682 [1:28:31<02:05, 2.03it/s]
|
1290 |
98%|βββββββοΏ½οΏ½οΏ½ββ| 10429/10682 [1:28:31<02:04, 2.03it/s]
|
1291 |
98%|ββββββββββ| 10430/10682 [1:28:32<02:04, 2.03it/s]
|
1292 |
98%|ββββββββββ| 10431/10682 [1:28:32<02:03, 2.03it/s]
|
1293 |
98%|ββββββββββ| 10432/10682 [1:28:33<02:03, 2.03it/s]
|
1294 |
98%|ββββββββββ| 10433/10682 [1:28:33<02:02, 2.03it/s]
|
1295 |
98%|ββββββββββ| 10434/10682 [1:28:34<02:02, 2.03it/s]
|
1296 |
98%|ββββββββββ| 10435/10682 [1:28:34<02:01, 2.03it/s]
|
1297 |
98%|ββββββββββ| 10436/10682 [1:28:35<02:01, 2.03it/s]
|
1298 |
98%|ββββββββββ| 10437/10682 [1:28:35<02:00, 2.03it/s]
|
1299 |
98%|ββββββββββ| 10438/10682 [1:28:36<02:00, 2.03it/s]
|
1300 |
98%|ββββββββββ| 10439/10682 [1:28:36<01:59, 2.03it/s]
|
1301 |
98%|ββββββββββ| 10440/10682 [1:28:37<01:59, 2.03it/s]
|
1302 |
98%|ββββββββββ| 10441/10682 [1:28:37<01:58, 2.03it/s]
|
1303 |
98%|ββββββββββ| 10442/10682 [1:28:38<01:58, 2.03it/s]
|
1304 |
98%|ββββββββββ| 10443/10682 [1:28:38<01:57, 2.03it/s]
|
1305 |
98%|ββββββββββ| 10444/10682 [1:28:38<01:57, 2.03it/s]
|
1306 |
98%|ββββββββββ| 10445/10682 [1:28:39<01:56, 2.03it/s]
|
1307 |
98%|ββββββββββ| 10446/10682 [1:28:39<01:56, 2.03it/s]
|
1308 |
98%|ββββββββββ| 10447/10682 [1:28:40<01:55, 2.03it/s]
|
1309 |
98%|ββββββββββ| 10448/10682 [1:28:40<01:55, 2.03it/s]
|
1310 |
98%|ββββββββββ| 10449/10682 [1:28:41<01:54, 2.03it/s]
|
1311 |
98%|ββββββββββ| 10450/10682 [1:28:41<01:54, 2.03it/s]
|
1312 |
|
1313 |
+
|
1314 |
98%|ββββββββββ| 10450/10682 [1:28:41<01:54, 2.03it/s]
|
1315 |
98%|ββββββββββ| 10451/10682 [1:28:42<01:53, 2.03it/s]
|
1316 |
98%|ββββββββββ| 10452/10682 [1:28:42<01:53, 2.03it/s]
|
1317 |
98%|ββββββββββ| 10453/10682 [1:28:43<01:52, 2.03it/s]
|
1318 |
98%|ββββββββββ| 10454/10682 [1:28:43<01:52, 2.03it/s]
|
1319 |
98%|ββββββββββ| 10455/10682 [1:28:44<01:51, 2.03it/s]
|
1320 |
98%|ββββββββββ| 10456/10682 [1:28:44<01:51, 2.03it/s]
|
1321 |
98%|ββββββββββ| 10457/10682 [1:28:45<01:50, 2.03it/s]
|
1322 |
98%|ββββββββββ| 10458/10682 [1:28:45<01:50, 2.03it/s]
|
1323 |
98%|ββββββββββ| 10459/10682 [1:28:46<01:49, 2.03it/s]
|
1324 |
98%|ββββββββββ| 10460/10682 [1:28:46<01:49, 2.03it/s]
|
1325 |
98%|ββββββββββ| 10461/10682 [1:28:47<01:48, 2.03it/s]
|
1326 |
98%|ββββββββββ| 10462/10682 [1:28:47<01:48, 2.03it/s]
|
1327 |
98%|ββββββββββ| 10463/10682 [1:28:48<01:47, 2.03it/s]
|
1328 |
98%|ββββββββββ| 10464/10682 [1:28:48<01:47, 2.03it/s]
|
1329 |
98%|ββββββββββ| 10465/10682 [1:28:49<01:47, 2.03it/s]
|
1330 |
98%|ββββββββββ| 10466/10682 [1:28:49<01:46, 2.03it/s]
|
1331 |
98%|ββββββββββ| 10467/10682 [1:28:50<01:46, 2.03it/s]
|
1332 |
98%|ββββββββββ| 10468/10682 [1:28:50<01:45, 2.03it/s]
|
1333 |
98%|ββββββββββ| 10469/10682 [1:28:51<01:44, 2.03it/s]
|
1334 |
98%|ββββββββββ| 10470/10682 [1:28:51<01:44, 2.03it/s]
|
1335 |
98%|ββββββββββ| 10471/10682 [1:28:52<01:43, 2.03it/s]
|
1336 |
98%|ββββββββββ| 10472/10682 [1:28:52<01:43, 2.03it/s]
|
1337 |
98%|ββββββββββ| 10473/10682 [1:28:53<01:42, 2.03it/s]
|
1338 |
98%|ββββββββββ| 10474/10682 [1:28:53<01:42, 2.03it/s]
|
1339 |
98%|ββββββββββ| 10475/10682 [1:28:54<01:42, 2.03it/s]{'loss': 2.7933, 'grad_norm': 0.26085323095321655, 'learning_rate': 1.143659993153079e-06, 'epoch': 13.73}
|
1340 |
+
|
1341 |
|
1342 |
98%|ββββββββββ| 10475/10682 [1:28:54<01:42, 2.03it/s]
|
1343 |
98%|ββββββββββ| 10476/10682 [1:28:54<01:41, 2.03it/s]
|
1344 |
98%|ββββββββββ| 10477/10682 [1:28:55<01:41, 2.03it/s]
|
1345 |
98%|ββββββββββ| 10478/10682 [1:28:55<01:40, 2.03it/s]
|
1346 |
98%|ββββββββββ| 10479/10682 [1:28:56<01:40, 2.03it/s]
|
1347 |
98%|ββββββββββ| 10480/10682 [1:28:56<01:39, 2.03it/s]
|
1348 |
98%|ββββββββββ| 10481/10682 [1:28:57<01:38, 2.03it/s]
|
1349 |
98%|ββββββββββ| 10482/10682 [1:28:57<01:38, 2.03it/s]
|
1350 |
98%|ββββββββββ| 10483/10682 [1:28:58<01:37, 2.03it/s]
|
1351 |
98%|ββββββββββ| 10484/10682 [1:28:58<01:37, 2.03it/s]
|
1352 |
98%|ββββββββββ| 10485/10682 [1:28:59<01:37, 2.03it/s]
|
1353 |
98%|ββββββββββ| 10486/10682 [1:28:59<01:36, 2.03it/s]
|
1354 |
98%|ββββββββββ| 10487/10682 [1:29:00<01:36, 2.03it/s]
|
1355 |
98%|ββββββββββ| 10488/10682 [1:29:00<01:35, 2.03it/s]
|
1356 |
98%|ββββββββββ| 10489/10682 [1:29:01<01:35, 2.03it/s]
|
1357 |
98%|ββββββββοΏ½οΏ½β| 10490/10682 [1:29:01<01:34, 2.03it/s]
|
1358 |
98%|ββββββββββ| 10491/10682 [1:29:02<01:34, 2.03it/s]
|
1359 |
98%|ββββββββββ| 10492/10682 [1:29:02<01:33, 2.03it/s]
|
1360 |
98%|ββββββββββ| 10493/10682 [1:29:03<01:33, 2.03it/s]
|
1361 |
98%|ββββββββββ| 10494/10682 [1:29:03<01:32, 2.03it/s]
|
1362 |
98%|ββββββββββ| 10495/10682 [1:29:04<01:32, 2.03it/s]
|
1363 |
98%|ββββββββββ| 10496/10682 [1:29:04<01:31, 2.03it/s]
|
1364 |
98%|ββββββββββ| 10497/10682 [1:29:05<01:31, 2.03it/s]
|
1365 |
98%|ββββββββββ| 10498/10682 [1:29:05<01:30, 2.03it/s]
|
1366 |
98%|ββββββββββ| 10499/10682 [1:29:06<01:30, 2.03it/s]
|
1367 |
98%|ββββββββββ| 10500/10682 [1:29:06<01:29, 2.03it/s]{'loss': 2.779, 'grad_norm': 0.2600437104701996, 'learning_rate': 8.841716933915555e-07, 'epoch': 13.76}
|
1368 |
|
1369 |
+
|
1370 |
98%|ββββββββββ| 10500/10682 [1:29:06<01:29, 2.03it/s]
|
1371 |
98%|ββββββββββ| 10501/10682 [1:29:07<01:29, 2.02it/s]
|
1372 |
98%|ββββββββββ| 10502/10682 [1:29:07<01:28, 2.03it/s]
|
1373 |
98%|ββββββββββ| 10503/10682 [1:29:08<01:28, 2.02it/s]
|
1374 |
98%|ββββββββββ| 10504/10682 [1:29:08<01:27, 2.02it/s]
|
1375 |
98%|ββββββββββ| 10505/10682 [1:29:09<01:27, 2.03it/s]
|
1376 |
98%|ββββββββββ| 10506/10682 [1:29:09<01:26, 2.03it/s]
|
1377 |
98%|ββββββββββ| 10507/10682 [1:29:10<01:26, 2.03it/s]
|
1378 |
98%|ββββββββββ| 10508/10682 [1:29:10<01:25, 2.03it/s]
|
1379 |
98%|ββββββββββ| 10509/10682 [1:29:11<01:25, 2.03it/s]
|
1380 |
98%|ββββββββββ| 10510/10682 [1:29:11<01:24, 2.03it/s]
|
1381 |
98%|ββββββββββ| 10511/10682 [1:29:12<01:24, 2.03it/s]
|
1382 |
98%|ββββββββββ| 10512/10682 [1:29:12<01:23, 2.03it/s]
|
1383 |
98%|ββββββββββ| 10513/10682 [1:29:13<01:23, 2.03it/s]
|
1384 |
98%|ββββββββββ| 10514/10682 [1:29:13<01:22, 2.03it/s]
|
1385 |
98%|ββββββββββ| 10515/10682 [1:29:13<01:22, 2.03it/s]
|
1386 |
98%|ββββββββββ| 10516/10682 [1:29:14<01:21, 2.03it/s]
|
1387 |
98%|ββββββββββ| 10517/10682 [1:29:14<01:21, 2.03it/s]
|
1388 |
98%|ββββββββββ| 10518/10682 [1:29:15<01:20, 2.03it/s]
|
1389 |
98%|ββββββββββ| 10519/10682 [1:29:15<01:20, 2.03it/s]
|
1390 |
98%|ββββββββββ| 10520/10682 [1:29:16<01:19, 2.03it/s]
|
1391 |
98%|ββββββββββ| 10521/10682 [1:29:16<01:19, 2.03it/s]
|
1392 |
99%|ββββββββββ| 10522/10682 [1:29:17<01:18, 2.03it/s]
|
1393 |
99%|ββββββββββ| 10523/10682 [1:29:17<01:18, 2.03it/s]
|
1394 |
99%|ββββββββββ| 10524/10682 [1:29:18<01:17, 2.03it/s]
|
1395 |
99%|ββββββββββ| 10525/10682 [1:29:18<01:17, 2.03it/s]
|
1396 |
|
1397 |
+
|
1398 |
99%|ββββββββββ| 10525/10682 [1:29:18<01:17, 2.03it/s]
|
1399 |
99%|ββββββββββ| 10526/10682 [1:29:19<01:16, 2.03it/s]
|
1400 |
99%|ββββββββββ| 10527/10682 [1:29:19<01:16, 2.03it/s]
|
1401 |
99%|ββββββββββ| 10528/10682 [1:29:20<01:15, 2.03it/s]
|
1402 |
99%|ββββββββββ| 10529/10682 [1:29:20<01:15, 2.03it/s]
|
1403 |
99%|ββββββββββ| 10530/10682 [1:29:21<01:14, 2.03it/s]
|
1404 |
99%|ββββββββββ| 10531/10682 [1:29:21<01:14, 2.03it/s]
|
1405 |
99%|ββββββββββ| 10532/10682 [1:29:22<01:13, 2.03it/s]
|
1406 |
99%|ββββββββββ| 10533/10682 [1:29:22<01:13, 2.03it/s]
|
1407 |
99%|ββββββββββ| 10534/10682 [1:29:23<01:12, 2.03it/s]
|
1408 |
99%|ββββββββββ| 10535/10682 [1:29:23<01:12, 2.03it/s]
|
1409 |
99%|ββββββββββ| 10536/10682 [1:29:24<01:11, 2.03it/s]
|
1410 |
99%|ββββββββββ| 10537/10682 [1:29:24<01:11, 2.03it/s]
|
1411 |
99%|ββββββββββ| 10538/10682 [1:29:25<01:10, 2.03it/s]
|
1412 |
99%|ββββββββββ| 10539/10682 [1:29:25<01:10, 2.03it/s]
|
1413 |
99%|ββββββββββ| 10540/10682 [1:29:26<01:09, 2.03it/s]
|
1414 |
99%|ββββββββββ| 10541/10682 [1:29:26<01:09, 2.03it/s]
|
1415 |
99%|ββββββββββ| 10542/10682 [1:29:27<01:08, 2.03it/s]
|
1416 |
99%|ββββββββββ| 10543/10682 [1:29:27<01:08, 2.03it/s]
|
1417 |
99%|ββββββββββ| 10544/10682 [1:29:28<01:08, 2.03it/s]
|
1418 |
99%|ββββββββββ| 10545/10682 [1:29:28<01:07, 2.03it/s]
|
1419 |
99%|ββββββββββ| 10546/10682 [1:29:29<01:07, 2.03it/s]
|
1420 |
99%|ββββββββββ| 10547/10682 [1:29:29<01:06, 2.03it/s]
|
1421 |
99%|ββββββββββ| 10548/10682 [1:29:30<01:05, 2.03it/s]
|
1422 |
99%|ββββββββββ| 10549/10682 [1:29:30<01:05, 2.03it/s]
|
1423 |
99%|ββββββββββ| 10550/10682 [1:29:31<01:04, 2.03it/s]{'loss': 2.7851, 'grad_norm': 0.26254361867904663, 'learning_rate': 4.651600211027507e-07, 'epoch': 13.83}
|
1424 |
|
1425 |
+
|
1426 |
99%|ββββββββββ| 10550/10682 [1:29:31<01:04, 2.03it/s]
|
1427 |
99%|ββββββββββ| 10551/10682 [1:29:31<01:04, 2.03it/s]
|
1428 |
99%|ββββββββββ| 10552/10682 [1:29:32<01:04, 2.03it/s]
|
1429 |
99%|ββββββββββ| 10553/10682 [1:29:32<01:03, 2.03it/s]
|
1430 |
99%|ββββββββββ| 10554/10682 [1:29:33<01:03, 2.03it/s]
|
1431 |
99%|ββββββββββ| 10555/10682 [1:29:33<01:02, 2.03it/s]
|
1432 |
99%|ββββββββββ| 10556/10682 [1:29:34<01:02, 2.03it/s]
|
1433 |
99%|ββββββββββ| 10557/10682 [1:29:34<01:01, 2.03it/s]
|
1434 |
99%|ββββββββββ| 10558/10682 [1:29:35<01:01, 2.03it/s]
|
1435 |
99%|ββββββββββ| 10559/10682 [1:29:35<01:00, 2.03it/s]
|
1436 |
99%|ββββββββββ| 10560/10682 [1:29:36<01:00, 2.03it/s]
|
1437 |
99%|ββββββββββ| 10561/10682 [1:29:36<00:59, 2.03it/s]
|
1438 |
99%|ββββββββββ| 10562/10682 [1:29:37<00:59, 2.03it/s]
|
1439 |
99%|ββββββββββ| 10563/10682 [1:29:37<00:58, 2.03it/s]
|
1440 |
99%|ββββββββββ| 10564/10682 [1:29:38<00:58, 2.03it/s]
|
1441 |
99%|ββββββββββ| 10565/10682 [1:29:38<00:57, 2.03it/s]
|
1442 |
99%|ββββββββββ| 10566/10682 [1:29:39<00:57, 2.03it/s]
|
1443 |
99%|ββββββββββ| 10567/10682 [1:29:39<00:56, 2.03it/s]
|
1444 |
99%|ββββββββββ| 10568/10682 [1:29:40<00:56, 2.03it/s]
|
1445 |
99%|ββββββββββ| 10569/10682 [1:29:40<00:55, 2.03it/s]
|
1446 |
99%|ββββββββββ| 10570/10682 [1:29:41<00:55, 2.03it/s]
|
1447 |
99%|ββββββββββ| 10571/10682 [1:29:41<00:54, 2.03it/s]
|
1448 |
99%|ββββββββββ| 10572/10682 [1:29:42<00:54, 2.03it/s]
|
1449 |
99%|ββββββββββ| 10573/10682 [1:29:42<00:53, 2.03it/s]
|
1450 |
99%|ββββββββββ| 10574/10682 [1:29:43<00:53, 2.03it/s]
|
1451 |
99%|ββββββββββ| 10575/10682 [1:29:43<00:52, 2.03it/s]
|
1452 |
{'loss': 2.7812, 'grad_norm': 0.2619117498397827, 'learning_rate': 3.0566461813213986e-07, 'epoch': 13.86}
|
1453 |
+
|
1454 |
99%|ββββββββββ| 10575/10682 [1:29:43<00:52, 2.03it/s]
|
1455 |
99%|ββββββββββ| 10576/10682 [1:29:44<00:52, 2.03it/s]
|
1456 |
99%|ββββββββββ| 10577/10682 [1:29:44<00:51, 2.03it/s]
|
1457 |
99%|ββββββββββ| 10578/10682 [1:29:45<00:51, 2.03it/s]
|
1458 |
99%|ββββββββββ| 10579/10682 [1:29:45<00:50, 2.03it/s]
|
1459 |
99%|ββββββββββ| 10580/10682 [1:29:46<00:50, 2.03it/s]
|
1460 |
99%|ββββββββββ| 10581/10682 [1:29:46<00:49, 2.03it/s]
|
1461 |
99%|ββββββββββ| 10582/10682 [1:29:46<00:49, 2.03it/s]
|
1462 |
99%|ββββββββββ| 10583/10682 [1:29:47<00:48, 2.03it/s]
|
1463 |
99%|ββββββββββ| 10584/10682 [1:29:47<00:48, 2.03it/s]
|
1464 |
99%|ββββββββββ| 10585/10682 [1:29:48<00:47, 2.03it/s]
|
1465 |
99%|ββββββββββ| 10586/10682 [1:29:48<00:47, 2.03it/s]
|
1466 |
99%|ββββββββββ| 10587/10682 [1:29:49<00:46, 2.03it/s]
|
1467 |
99%|ββββββββββ| 10588/10682 [1:29:49<00:46, 2.03it/s]
|
1468 |
99%|ββββββββββ| 10589/10682 [1:29:50<00:45, 2.03it/s]
|
1469 |
99%|ββββββββββ| 10590/10682 [1:29:50<00:45, 2.03it/s]
|
1470 |
99%|ββββββββββ| 10591/10682 [1:29:51<00:44, 2.03it/s]
|
1471 |
99%|ββββββββββ| 10592/10682 [1:29:51<00:44, 2.03it/s]
|
1472 |
99%|ββββββββββ| 10593/10682 [1:29:52<00:43, 2.03it/s]
|
1473 |
99%|ββββββββββ| 10594/10682 [1:29:52<00:43, 2.03it/s]
|
1474 |
99%|ββββββββββ| 10595/10682 [1:29:53<00:42, 2.03it/s]
|
1475 |
99%|ββββββββββ| 10596/10682 [1:29:53<00:42, 2.03it/s]
|
1476 |
99%|ββββββββββ| 10597/10682 [1:29:54<00:41, 2.03it/s]
|
1477 |
99%|ββββββββββ| 10598/10682 [1:29:54<00:41, 2.03it/s]
|
1478 |
99%|ββββββββββ| 10599/10682 [1:29:55<00:40, 2.03it/s]
|
1479 |
99%|ββββββββββ| 10600/10682 [1:29:55<00:40, 2.03it/s]
|
1480 |
{'loss': 2.7727, 'grad_norm': 0.2640470564365387, 'learning_rate': 1.7952444123359167e-07, 'epoch': 13.89}
|
1481 |
+
|
1482 |
99%|ββββββββββ| 10600/10682 [1:29:55<00:40, 2.03it/s]
|
1483 |
99%|ββββββββββ| 10601/10682 [1:29:56<00:40, 2.02it/s]
|
1484 |
99%|ββββββββββ| 10602/10682 [1:29:56<00:39, 2.03it/s]
|
1485 |
99%|ββββββββββ| 10603/10682 [1:29:57<00:39, 2.02it/s]
|
1486 |
99%|ββββββββββ| 10604/10682 [1:29:57<00:38, 2.03it/s]
|
1487 |
99%|ββββββββββ| 10605/10682 [1:29:58<00:37, 2.03it/s]
|
1488 |
99%|ββββββββββ| 10606/10682 [1:29:58<00:37, 2.03it/s]
|
1489 |
99%|ββββββββββ| 10607/10682 [1:29:59<00:36, 2.03it/s]
|
1490 |
99%|ββββββββββ| 10608/10682 [1:29:59<00:36, 2.03it/s]
|
1491 |
99%|ββββββοΏ½οΏ½οΏ½βββ| 10609/10682 [1:30:00<00:35, 2.03it/s]
|
1492 |
99%|ββββββββββ| 10610/10682 [1:30:00<00:35, 2.03it/s]
|
1493 |
99%|ββββββββββ| 10611/10682 [1:30:01<00:34, 2.03it/s]
|
1494 |
99%|ββββββββββ| 10612/10682 [1:30:01<00:34, 2.03it/s]
|
1495 |
99%|ββββββββββ| 10613/10682 [1:30:02<00:34, 2.03it/s]
|
1496 |
99%|ββββββββββ| 10614/10682 [1:30:02<00:33, 2.03it/s]
|
1497 |
99%|ββββββββββ| 10615/10682 [1:30:03<00:33, 2.03it/s]
|
1498 |
99%|ββββββββββ| 10616/10682 [1:30:03<00:32, 2.03it/s]
|
1499 |
99%|ββββββββββ| 10617/10682 [1:30:04<00:32, 2.03it/s]
|
1500 |
99%|ββββββββββ| 10618/10682 [1:30:04<00:31, 2.03it/s]
|
1501 |
99%|ββββββββββ| 10619/10682 [1:30:05<00:31, 2.03it/s]
|
1502 |
99%|ββββββββββ| 10620/10682 [1:30:05<00:30, 2.03it/s]
|
1503 |
99%|ββββββββββ| 10621/10682 [1:30:06<00:30, 2.03it/s]
|
1504 |
99%|ββββββββββ| 10622/10682 [1:30:06<00:29, 2.03it/s]
|
1505 |
99%|ββββββββββ| 10623/10682 [1:30:07<00:29, 2.03it/s]
|
1506 |
99%|ββββββββββ| 10624/10682 [1:30:07<00:28, 2.03it/s]
|
1507 |
99%|ββββββββββ| 10625/10682 [1:30:08<00:28, 2.03it/s]
|
1508 |
{'loss': 2.7884, 'grad_norm': 0.2641913592815399, 'learning_rate': 8.674791042273533e-08, 'epoch': 13.93}
|
1509 |
+
|
1510 |
99%|ββββββββββ| 10625/10682 [1:30:08<00:28, 2.03it/s]
|
1511 |
99%|ββββββββββ| 10626/10682 [1:30:08<00:27, 2.03it/s]
|
1512 |
99%|ββββββββββ| 10627/10682 [1:30:09<00:27, 2.03it/s]
|
1513 |
99%|ββββββββββ| 10628/10682 [1:30:09<00:26, 2.03it/s]
|
1514 |
|
1515 |
+
|
1516 |
{'loss': 2.7799, 'grad_norm': 0.26377299427986145, 'learning_rate': 1.3083313863404555e-09, 'epoch': 13.99}
|
1517 |
+
|
1518 |
+
|
1519 |
|
1520 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|