Training in progress, epoch 12
Browse files- logs/events.out.tfevents.1716829581.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +32 -1
logs/events.out.tfevents.1716829581.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:746138e2c2b6d5b5db1e63d77ed9640b02a0a1d4bf153cf4c989586d6832b8f2
|
3 |
+
size 88531
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d250e12349978270db673e201e2ae15722acb41c1fe39adefb2be0d0587306ef
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -555,4 +555,35 @@ command outputs:
|
|
555 |
|
556 |
|
557 |
85%|βββββββββ | 9100/10682 [1:17:44<12:59, 2.03it/s]
|
558 |
85%|βββββββββ | 9101/10682 [1:17:45<13:00, 2.02it/s]
|
559 |
85%|βββββββββ | 9102/10682 [1:17:45<12:59, 2.03it/s]
|
560 |
85%|βββββββββ | 9103/10682 [1:17:46<12:58, 2.03it/s]
|
561 |
85%|βββββββββ | 9104/10682 [1:17:46<12:59, 2.03it/s]
|
562 |
85%|βββββββββ | 9105/10682 [1:17:47<12:57, 2.03it/s]
|
563 |
85%|βββββββββ | 9106/10682 [1:17:47<12:57, 2.03it/s]
|
564 |
85%|βββββββββ | 9107/10682 [1:17:48<12:56, 2.03it/s]
|
565 |
85%|βββββββββ | 9108/10682 [1:17:48<12:56, 2.03it/s]
|
566 |
85%|βββββββββ | 9109/10682 [1:17:49<12:55, 2.03it/s]
|
567 |
85%|βββββββββ | 9110/10682 [1:17:49<12:54, 2.03it/s]
|
568 |
85%|βββββββββ | 9111/10682 [1:17:50<12:55, 2.03it/s]
|
569 |
85%|βββββββββ | 9112/10682 [1:17:50<12:53, 2.03it/s]
|
570 |
85%|βββββββββ | 9113/10682 [1:17:51<12:53, 2.03it/s]
|
571 |
85%|βββββββββ | 9114/10682 [1:17:51<12:52, 2.03it/s]
|
572 |
85%|βββββββββ | 9115/10682 [1:17:52<12:51, 2.03it/s]
|
573 |
85%|βββββββββ | 9116/10682 [1:17:52<12:52, 2.03it/s]
|
574 |
85%|βββββββββ | 9117/10682 [1:17:53<12:51, 2.03it/s]
|
575 |
85%|βββββββββ | 9118/10682 [1:17:53<12:51, 2.03it/s]
|
576 |
85%|βββββββββ | 9119/10682 [1:17:54<12:50, 2.03it/s]
|
577 |
85%|βββββββββ | 9120/10682 [1:17:54<12:51, 2.03it/s]
|
578 |
85%|βββββββββ | 9121/10682 [1:17:55<12:50, 2.03it/s]
|
579 |
85%|βββββββββ | 9122/10682 [1:17:55<12:50, 2.02it/s]
|
580 |
85%|βββββββββ | 9123/10682 [1:17:55<12:49, 2.02it/s]
|
581 |
85%|βββββββββ | 9124/10682 [1:17:56<12:49, 2.03it/s]
|
582 |
85%|βββββββββ | 9125/10682 [1:17:56<12:49, 2.02it/s]{'loss': 2.6917, 'grad_norm': 0.26624444127082825, 'learning_rate': 6.334441157888504e-05, 'epoch': 11.95}
|
583 |
|
584 |
|
585 |
85%|βββββββββ | 9125/10682 [1:17:56<12:49, 2.02it/s]
|
586 |
85%|βββββββββ | 9126/10682 [1:17:57<12:49, 2.02it/s]
|
587 |
85%|βββββββββ | 9127/10682 [1:17:57<12:48, 2.02it/s]
|
588 |
85%|βββββββββ | 9128/10682 [1:17:58<12:47, 2.02it/s]
|
589 |
85%|βββββββββ | 9129/10682 [1:17:58<12:46, 2.02it/s]
|
590 |
85%|βββββββββ | 9130/10682 [1:17:59<12:47, 2.02it/s]
|
591 |
85%|βββββββββ | 9131/10682 [1:17:59<12:45, 2.03it/s]
|
592 |
85%|βββββββββ | 9132/10682 [1:18:00<12:45, 2.02it/s]
|
593 |
85%|βββββββββ | 9133/10682 [1:18:00<12:44, 2.03it/s]
|
594 |
86%|βββββββββ | 9134/10682 [1:18:01<12:44, 2.03it/s]
|
595 |
86%|βββββββββ | 9135/10682 [1:18:01<12:43, 2.03it/s]
|
596 |
86%|βββββββββ | 9136/10682 [1:18:02<12:43, 2.03it/s]
|
597 |
86%|βββββββββ | 9137/10682 [1:18:02<12:42, 2.03it/s]
|
598 |
86%|βββββββββ | 9138/10682 [1:18:03<12:41, 2.03it/s]
|
599 |
86%|βββββββββ | 9139/10682 [1:18:03<12:41, 2.03it/s]
|
600 |
86%|βββββββββ | 9140/10682 [1:18:04<12:40, 2.03it/s]
|
601 |
86%|βββββββββ | 9141/10682 [1:18:04<12:39, 2.03it/s]
|
602 |
86%|βββββββββ | 9142/10682 [1:18:05<12:39, 2.03it/s]
|
603 |
86%|βββββββββ | 9143/10682 [1:18:05<12:39, 2.03it/s]
|
604 |
86%|βββββββββ | 9144/10682 [1:18:06<12:39, 2.03it/s]
|
605 |
86%|βββββββββ | 9145/10682 [1:18:06<12:38, 2.03it/s]
|
606 |
86%|βββββββββ | 9146/10682 [1:18:07<12:38, 2.03it/s]
|
607 |
86%|βββββββββ | 9147/10682 [1:18:07<12:36, 2.03it/s]
|
608 |
86%|βββββββββ | 9148/10682 [1:18:08<12:37, 2.03it/s]
|
609 |
86%|βββββββββ | 9149/10682 [1:18:08<12:36, 2.03it/s]
|
610 |
86%|βββββββββ | 9150/10682 [1:18:09<12:35, 2.03it/s]{'loss': 2.6889, 'grad_norm': 0.2705479860305786, 'learning_rate': 6.1368906655978e-05, 'epoch': 11.98}
|
611 |
|
612 |
|
613 |
86%|βββββββββ | 9150/10682 [1:18:09<12:35, 2.03it/s]
|
614 |
86%|βββββββββ | 9151/10682 [1:18:09<12:36, 2.02it/s]
|
615 |
86%|βββββββββ | 9152/10682 [1:18:10<12:35, 2.03it/s]
|
616 |
86%|βββββββββ | 9153/10682 [1:18:10<12:35, 2.02it/s]
|
617 |
86%|βββββββββ | 9154/10682 [1:18:11<12:34, 2.02it/s]
|
618 |
86%|βββββββββ | 9155/10682 [1:18:11<12:34, 2.02it/s]
|
619 |
86%|βββββββββ | 9156/10682 [1:18:12<12:33, 2.03it/s]
|
620 |
86%|βββββββββ | 9157/10682 [1:18:12<12:32, 2.03it/s]
|
621 |
86%|βββββββββ | 9158/10682 [1:18:13<12:32, 2.03it/s]
|
622 |
86%|βββββββββ | 9159/10682 [1:18:13<12:31, 2.03it/s]
|
623 |
86%|βββββββββ | 9160/10682 [1:18:14<12:30, 2.03it/s]
|
624 |
86%|βββββββββ | 9161/10682 [1:18:14<12:29, 2.03it/s]
|
625 |
86%|βββββββοΏ½οΏ½οΏ½β | 9162/10682 [1:18:15<12:22, 2.05it/s]
|
626 |
86%|βββββββββ | 9163/10682 [1:18:28<1:45:57, 4.19s/it]
|
627 |
86%|βββββββββ | 9164/10682 [1:18:28<1:17:55, 3.08s/it]
|
628 |
86%|βββββββββ | 9165/10682 [1:18:29<58:14, 2.30s/it]
|
629 |
86%|βββββββββ | 9166/10682 [1:18:29<44:33, 1.76s/it]
|
630 |
86%|βββββββββ | 9167/10682 [1:18:30<34:54, 1.38s/it]
|
631 |
86%|βββββββββ | 9168/10682 [1:18:30<28:08, 1.12s/it]
|
632 |
86%|βββββββββ | 9169/10682 [1:18:31<23:24, 1.08it/s]
|
633 |
86%|βββββββββ | 9170/10682 [1:18:31<20:06, 1.25it/s]
|
634 |
86%|βββββββββ | 9171/10682 [1:18:31<17:47, 1.42it/s]
|
635 |
86%|βββββββββ | 9172/10682 [1:18:32<16:15, 1.55it/s]
|
636 |
86%|βββββββββ | 9173/10682 [1:18:32<15:06, 1.66it/s]
|
637 |
86%|βββββββββ | 9174/10682 [1:18:33<14:17, 1.76it/s]
|
638 |
86%|βββββββββ | 9175/10682 [1:18:33<13:44, 1.83it/s]{'loss': 2.6468, 'grad_norm': 0.26857879757881165, 'learning_rate': 5.94226809108499e-05, 'epoch': 12.02}
|
639 |
-
|
640 |
|
641 |
86%|βββββββββ | 9175/10682 [1:18:33<13:44, 1.83it/s]
|
642 |
86%|βββββββββ | 9176/10682 [1:18:34<13:19, 1.88it/s]
|
643 |
86%|βββββββββ | 9177/10682 [1:18:34<13:02, 1.92it/s]
|
644 |
86%|βββββββββ | 9178/10682 [1:18:35<12:49, 1.96it/s]
|
645 |
86%|βββββββββ | 9179/10682 [1:18:35<12:40, 1.98it/s]
|
646 |
86%|βββββββββ | 9180/10682 [1:18:36<12:33, 1.99it/s]
|
647 |
86%|βββββββββ | 9181/10682 [1:18:36<12:28, 2.00it/s]
|
648 |
86%|βββββββββ | 9182/10682 [1:18:37<12:27, 2.01it/s]
|
649 |
86%|βββββββββ | 9183/10682 [1:18:37<12:24, 2.01it/s]
|
650 |
86%|βββββββββ | 9184/10682 [1:18:38<12:23, 2.01it/s]
|
651 |
86%|βββββββββ | 9185/10682 [1:18:38<12:21, 2.02it/s]
|
652 |
86%|βββββββββ | 9186/10682 [1:18:39<12:21, 2.02it/s]
|
653 |
86%|βββββββββ | 9187/10682 [1:18:39<12:19, 2.02it/s]
|
654 |
86%|βββββββββ | 9188/10682 [1:18:40<12:18, 2.02it/s]
|
655 |
86%|βββββββββ | 9189/10682 [1:18:40<12:18, 2.02it/s]
|
656 |
86%|βββββββββ | 9190/10682 [1:18:41<12:19, 2.02it/s]
|
|
|
657 |
|
658 |
86%|βββββββββ | 9175/10682 [1:18:33<13:44, 1.83it/s]
|
659 |
86%|βββββββββ | 9176/10682 [1:18:34<13:19, 1.88it/s]
|
660 |
86%|βββββββββ | 9177/10682 [1:18:34<13:02, 1.92it/s]
|
661 |
86%|βββββββββ | 9178/10682 [1:18:35<12:49, 1.96it/s]
|
662 |
86%|βββββββββ | 9179/10682 [1:18:35<12:40, 1.98it/s]
|
663 |
86%|βββββββββ | 9180/10682 [1:18:36<12:33, 1.99it/s]
|
664 |
86%|βββββββββ | 9181/10682 [1:18:36<12:28, 2.00it/s]
|
665 |
86%|βββββββββ | 9182/10682 [1:18:37<12:27, 2.01it/s]
|
666 |
86%|βββββββββ | 9183/10682 [1:18:37<12:24, 2.01it/s]
|
667 |
86%|βββββββββ | 9184/10682 [1:18:38<12:23, 2.01it/s]
|
668 |
86%|βββββββββ | 9185/10682 [1:18:38<12:21, 2.02it/s]
|
669 |
86%|βββββββββ | 9186/10682 [1:18:39<12:21, 2.02it/s]
|
670 |
86%|βββββββββ | 9187/10682 [1:18:39<12:19, 2.02it/s]
|
671 |
86%|βββββββββ | 9188/10682 [1:18:40<12:18, 2.02it/s]
|
672 |
86%|βββββββββ | 9189/10682 [1:18:40<12:18, 2.02it/s]
|
673 |
86%|βββββββββ | 9190/10682 [1:18:41<12:19, 2.02it/s]
|
674 |
86%|βββββββββ | 9191/10682 [1:18:41<12:20, 2.01it/s]
|
675 |
86%|βββββββββ | 9192/10682 [1:18:42<12:17, 2.02it/s]
|
676 |
86%|βββββββββ | 9193/10682 [1:18:42<12:15, 2.02it/s]
|
677 |
86%|βββββββββ | 9194/10682 [1:18:43<12:14, 2.03it/s]
|
678 |
86%|βββββββββ | 9195/10682 [1:18:43<12:12, 2.03it/s]
|
679 |
86%|βββββββββ | 9196/10682 [1:18:44<12:12, 2.03it/s]
|
680 |
86%|βββββββββ | 9197/10682 [1:18:44<12:11, 2.03it/s]
|
681 |
86%|βββββββββ | 9198/10682 [1:18:45<12:10, 2.03it/s]
|
682 |
86%|βββββββββ | 9199/10682 [1:18:45<12:10, 2.03it/s]
|
683 |
86%|βββββββββ | 9200/10682 [1:18:46<12:09, 2.03it/s]{'loss': 2.6277, 'grad_norm': 0.2667827308177948, 'learning_rate': 5.7505864256519716e-05, 'epoch': 12.05}
|
|
|
684 |
|
685 |
86%|βββββββββ | 9200/10682 [1:18:46<12:09, 2.03it/s]
|
686 |
86%|βββββββββ | 9201/10682 [1:18:46<12:09, 2.03it/s]
|
687 |
86%|βββββββββ | 9202/10682 [1:18:47<12:09, 2.03it/s]
|
688 |
86%|βββββββββ | 9203/10682 [1:18:47<12:08, 2.03it/s]
|
689 |
86%|βββββββββ | 9204/10682 [1:18:48<12:09, 2.03it/s]
|
690 |
86%|βββββββββ | 9205/10682 [1:18:48<12:07, 2.03it/s]
|
691 |
86%|βββββββββ | 9206/10682 [1:18:49<12:05, 2.03it/s]
|
692 |
86%|βββββββββ | 9207/10682 [1:18:49<12:05, 2.03it/s]
|
693 |
86%|βββββββββ | 9208/10682 [1:18:50<12:05, 2.03it/s]
|
694 |
86%|βββββββββ | 9209/10682 [1:18:50<12:04, 2.03it/s]
|
695 |
86%|βββββββββ | 9210/10682 [1:18:51<12:04, 2.03it/s]
|
696 |
86%|βββββββββ | 9211/10682 [1:18:51<12:03, 2.03it/s]
|
697 |
86%|βββββββββ | 9212/10682 [1:18:52<12:03, 2.03it/s]
|
698 |
86%|βββββββββ | 9213/10682 [1:18:52<12:03, 2.03it/s]
|
699 |
86%|βββββββββ | 9214/10682 [1:18:53<12:02, 2.03it/s]
|
700 |
86%|βββββββββ | 9215/10682 [1:18:53<12:02, 2.03it/s]
|
701 |
86%|βββββββββ | 9216/10682 [1:18:54<12:01, 2.03it/s]
|
702 |
86%|βββββββββ | 9217/10682 [1:18:54<12:01, 2.03it/s]
|
703 |
86%|βββββββββ | 9218/10682 [1:18:55<12:00, 2.03it/s]
|
704 |
86%|βββββββββ | 9219/10682 [1:18:55<12:00, 2.03it/s]
|
705 |
86%|βββββββββ | 9220/10682 [1:18:56<12:00, 2.03it/s]
|
706 |
86%|βββββββββ | 9221/10682 [1:18:56<12:00, 2.03it/s]
|
707 |
86%|βββββββββ | 9222/10682 [1:18:57<11:59, 2.03it/s]
|
708 |
86%|βββββββββ | 9223/10682 [1:18:57<11:58, 2.03it/s]
|
709 |
86%|βββββββββ | 9224/10682 [1:18:58<11:58, 2.03it/s]
|
710 |
86%|βββββββββ | 9225/10682 [1:18:58<11:58, 2.03it/s]{'loss': 2.6087, 'grad_norm': 0.26708680391311646, 'learning_rate': 5.561858464291258e-05, 'epoch': 12.08}
|
711 |
|
|
|
712 |
86%|βββββββββ | 9225/10682 [1:18:58<11:58, 2.03it/s]
|
713 |
86%|βββββββββ | 9226/10682 [1:18:59<11:58, 2.03it/s]
|
714 |
86%|βββββββββ | 9227/10682 [1:18:59<11:58, 2.02it/s]
|
715 |
86%|βββββββββ | 9228/10682 [1:19:00<11:57, 2.03it/s]
|
716 |
86%|βββββββββ | 9229/10682 [1:19:00<11:56, 2.03it/s]
|
717 |
86%|βββββββββ | 9230/10682 [1:19:01<11:55, 2.03it/s]
|
718 |
86%|βββββββββ | 9231/10682 [1:19:01<11:54, 2.03it/s]
|
719 |
86%|βββββββββ | 9232/10682 [1:19:02<11:54, 2.03it/s]
|
720 |
86%|βββββββββ | 9233/10682 [1:19:02<11:54, 2.03it/s]
|
721 |
86%|βββββββββ | 9234/10682 [1:19:03<11:53, 2.03it/s]
|
722 |
86%|βββββββββ | 9235/10682 [1:19:03<11:53, 2.03it/s]
|
723 |
86%|βββββββββ | 9236/10682 [1:19:04<11:52, 2.03it/s]
|
724 |
86%|βββββββββ | 9237/10682 [1:19:04<11:52, 2.03it/s]
|
725 |
86%|βββββββββ | 9238/10682 [1:19:05<11:51, 2.03it/s]
|
726 |
86%|βββββββββ | 9239/10682 [1:19:05<11:50, 2.03it/s]
|
727 |
87%|βββββββββ | 9240/10682 [1:19:06<11:50, 2.03it/s]
|
728 |
87%|βββββββββ | 9241/10682 [1:19:06<11:49, 2.03it/s]
|
729 |
87%|βββββββββ | 9242/10682 [1:19:07<11:49, 2.03it/s]
|
730 |
87%|βββββββββ | 9243/10682 [1:19:07<11:48, 2.03it/s]
|
731 |
87%|βββββββββ | 9244/10682 [1:19:07<11:47, 2.03it/s]
|
732 |
87%|βββββββββ | 9245/10682 [1:19:08<11:47, 2.03it/s]
|
733 |
87%|βββββββββ | 9246/10682 [1:19:08<11:47, 2.03it/s]
|
734 |
87%|βββββββββ | 9247/10682 [1:19:09<11:45, 2.03it/s]
|
735 |
87%|βββββββββ | 9248/10682 [1:19:09<11:45, 2.03it/s]
|
736 |
87%|βββββββββ | 9249/10682 [1:19:10<11:45, 2.03it/s]
|
737 |
87%|βββββββββ | 9250/10682 [1:19:10<11:44, 2.03it/s]{'loss': 2.6341, 'grad_norm': 0.2648920714855194, 'learning_rate': 5.3760968048319145e-05, 'epoch': 12.12}
|
|
|
738 |
|
739 |
87%|βββββββββ | 9250/10682 [1:19:10<11:44, 2.03it/s]
|
740 |
87%|βββββββββ | 9251/10682 [1:19:11<11:45, 2.03it/s]
|
741 |
87%|βββββββββ | 9252/10682 [1:19:11<11:44, 2.03it/s]
|
742 |
87%|βββββββββ | 9253/10682 [1:19:12<11:44, 2.03it/s]
|
743 |
87%|βββββββββ | 9254/10682 [1:19:12<11:43, 2.03it/s]
|
744 |
87%|βββββββββ | 9255/10682 [1:19:13<11:43, 2.03it/s]
|
745 |
87%|βββββββββ | 9256/10682 [1:19:13<11:43, 2.03it/s]
|
746 |
87%|βββββββββ | 9257/10682 [1:19:14<11:42, 2.03it/s]
|
747 |
87%|βββββββββ | 9258/10682 [1:19:14<11:41, 2.03it/s]
|
748 |
87%|βββββββββ | 9259/10682 [1:19:15<11:41, 2.03it/s]
|
749 |
87%|βββββββββ | 9260/10682 [1:19:15<11:40, 2.03it/s]
|
750 |
87%|βββββββββ | 9261/10682 [1:19:16<11:40, 2.03it/s]
|
751 |
87%|βββββββββ | 9262/10682 [1:19:16<11:39, 2.03it/s]
|
752 |
87%|βββββββββ | 9263/10682 [1:19:17<11:39, 2.03it/s]
|
753 |
87%|βββββββββ | 9264/10682 [1:19:17<11:39, 2.03it/s]
|
754 |
87%|βββββββββ | 9265/10682 [1:19:18<11:37, 2.03it/s]
|
755 |
87%|βββββββββ | 9266/10682 [1:19:18<11:37, 2.03it/s]
|
756 |
87%|βββββββββ | 9267/10682 [1:19:19<11:36, 2.03it/s]
|
757 |
87%|βββββββββ | 9268/10682 [1:19:19<11:36, 2.03it/s]
|
758 |
87%|βββββββββ | 9269/10682 [1:19:20<11:36, 2.03it/s]
|
759 |
87%|βββββββββ | 9270/10682 [1:19:20<11:35, 2.03it/s]
|
760 |
87%|βββββββββ | 9271/10682 [1:19:21<11:35, 2.03it/s]
|
761 |
87%|ββββββββοΏ½οΏ½οΏ½ | 9272/10682 [1:19:21<11:34, 2.03it/s]
|
762 |
87%|βββββββββ | 9273/10682 [1:19:22<11:34, 2.03it/s]
|
763 |
87%|βββββββββ | 9274/10682 [1:19:22<11:33, 2.03it/s]
|
764 |
87%|βββββββββ | 9275/10682 [1:19:23<11:33, 2.03it/s]{'loss': 2.6443, 'grad_norm': 0.268455445766449, 'learning_rate': 5.193313847098613e-05, 'epoch': 12.15}
|
|
|
765 |
|
766 |
87%|βββββββββ | 9275/10682 [1:19:23<11:33, 2.03it/s]
|
767 |
87%|βββββββββ | 9276/10682 [1:19:23<11:33, 2.03it/s]
|
768 |
87%|βββββββββ | 9277/10682 [1:19:24<11:32, 2.03it/s]
|
769 |
87%|βββββββββ | 9278/10682 [1:19:24<11:31, 2.03it/s]
|
770 |
87%|βββββββββ | 9279/10682 [1:19:25<11:31, 2.03it/s]
|
771 |
87%|βββββββββ | 9280/10682 [1:19:25<11:30, 2.03it/s]
|
772 |
87%|βββββββββ | 9281/10682 [1:19:26<11:30, 2.03it/s]
|
773 |
87%|βββββββββ | 9282/10682 [1:19:26<11:30, 2.03it/s]
|
774 |
87%|βββββββββ | 9283/10682 [1:19:27<11:28, 2.03it/s]
|
775 |
87%|βββββββββ | 9284/10682 [1:19:27<11:28, 2.03it/s]
|
776 |
87%|βββββββββ | 9285/10682 [1:19:28<11:28, 2.03it/s]
|
777 |
87%|βββββββββ | 9286/10682 [1:19:28<11:27, 2.03it/s]
|
778 |
87%|βββββββββ | 9287/10682 [1:19:29<11:27, 2.03it/s]
|
779 |
87%|βββββββββ | 9288/10682 [1:19:29<11:27, 2.03it/s]
|
780 |
87%|βββββββββ | 9289/10682 [1:19:30<11:26, 2.03it/s]
|
781 |
87%|βββββββββ | 9290/10682 [1:19:30<11:26, 2.03it/s]
|
782 |
87%|βββββββββ | 9291/10682 [1:19:31<11:25, 2.03it/s]
|
783 |
87%|βββββββββ | 9292/10682 [1:19:31<11:25, 2.03it/s]
|
784 |
87%|βββββββββ | 9293/10682 [1:19:32<11:25, 2.03it/s]
|
785 |
87%|βββββββββ | 9294/10682 [1:19:32<11:24, 2.03it/s]
|
786 |
87%|βββββββββ | 9295/10682 [1:19:33<11:24, 2.03it/s]
|
787 |
87%|βββββββββ | 9296/10682 [1:19:33<11:24, 2.03it/s]
|
788 |
87%|βββββββββ | 9297/10682 [1:19:34<11:23, 2.03it/s]
|
789 |
87%|βββββββββ | 9298/10682 [1:19:34<11:23, 2.03it/s]
|
790 |
87%|βββββββββ | 9299/10682 [1:19:35<11:21, 2.03it/s]
|
791 |
87%|βββββββββ | 9300/10682 [1:19:35<11:21, 2.03it/s]{'loss': 2.6238, 'grad_norm': 0.2692926526069641, 'learning_rate': 5.0135217920839137e-05, 'epoch': 12.18}
|
|
|
792 |
|
793 |
87%|βββββββββ | 9300/10682 [1:19:35<11:21, 2.03it/s]
|
794 |
87%|βββββββββ | 9301/10682 [1:19:36<11:21, 2.03it/s]
|
795 |
87%|βββββββββ | 9302/10682 [1:19:36<11:21, 2.03it/s]
|
796 |
87%|βββββββββ | 9303/10682 [1:19:37<11:20, 2.03it/s]
|
797 |
87%|βββββββββ | 9304/10682 [1:19:37<11:19, 2.03it/s]
|
798 |
87%|βββββββββ | 9305/10682 [1:19:38<11:19, 2.03it/s]
|
799 |
87%|βββββββββ | 9306/10682 [1:19:38<11:18, 2.03it/s]
|
800 |
87%|βββββββββ | 9307/10682 [1:19:39<11:18, 2.03it/s]
|
801 |
87%|βββββββββ | 9308/10682 [1:19:39<11:17, 2.03it/s]
|
802 |
87%|βββββββββ | 9309/10682 [1:19:40<11:17, 2.03it/s]
|
803 |
87%|βββββββββ | 9310/10682 [1:19:40<11:16, 2.03it/s]
|
804 |
87%|βββββββββ | 9311/10682 [1:19:41<11:15, 2.03it/s]
|
805 |
87%|βββββββββ | 9312/10682 [1:19:41<11:15, 2.03it/s]
|
806 |
87%|βββββββββ | 9313/10682 [1:19:42<11:14, 2.03it/s]
|
807 |
87%|βββββββββ | 9314/10682 [1:19:42<11:14, 2.03it/s]
|
808 |
87%|βββββββββ | 9315/10682 [1:19:42<11:13, 2.03it/s]
|
809 |
87%|βββββββββ | 9316/10682 [1:19:43<11:14, 2.03it/s]
|
810 |
87%|βββββββββ | 9317/10682 [1:19:43<11:13, 2.03it/s]
|
811 |
87%|βββββββββ | 9318/10682 [1:19:44<11:12, 2.03it/s]
|
812 |
87%|βββββββββ | 9319/10682 [1:19:44<11:12, 2.03it/s]
|
813 |
87%|βββββββββ | 9320/10682 [1:19:45<11:11, 2.03it/s]
|
814 |
87%|βββββββββ | 9321/10682 [1:19:45<11:11, 2.03it/s]
|
815 |
87%|βββββββββ | 9322/10682 [1:19:46<11:10, 2.03it/s]
|
816 |
87%|βββββββββ | 9323/10682 [1:19:46<11:09, 2.03it/s]
|
817 |
87%|βββββββββ | 9324/10682 [1:19:47<11:09, 2.03it/s]
|
818 |
87%|βββββββββ | 9325/10682 [1:19:47<11:08, 2.03it/s]{'loss': 2.6351, 'grad_norm': 0.2662162184715271, 'learning_rate': 4.836732641133895e-05, 'epoch': 12.21}
|
|
|
819 |
|
820 |
87%|βββββββββ | 9325/10682 [1:19:47<11:08, 2.03it/s]
|
821 |
87%|βββββββββ | 9326/10682 [1:19:48<11:09, 2.03it/s]
|
822 |
87%|βββββββββ | 9327/10682 [1:19:48<11:08, 2.03it/s]
|
823 |
87%|βββββββββ | 9328/10682 [1:19:49<11:08, 2.02it/s]
|
824 |
87%|βββββββββ | 9329/10682 [1:19:49<11:07, 2.03it/s]
|
825 |
87%|βββββββββ | 9330/10682 [1:19:50<11:06, 2.03it/s]
|
826 |
87%|βββββββββ | 9331/10682 [1:19:50<11:06, 2.03it/s]
|
827 |
87%|βββββββββ | 9332/10682 [1:19:51<11:05, 2.03it/s]
|
828 |
87%|βββββββββ | 9333/10682 [1:19:51<11:05, 2.03it/s]
|
829 |
87%|βββββββββ | 9334/10682 [1:19:52<11:03, 2.03it/s]
|
830 |
87%|βββββββββ | 9335/10682 [1:19:52<11:04, 2.03it/s]
|
831 |
87%|βββββββββ | 9336/10682 [1:19:53<11:03, 2.03it/s]
|
832 |
87%|βββββββββ | 9337/10682 [1:19:53<11:03, 2.03it/s]
|
833 |
87%|βββββββββ | 9338/10682 [1:19:54<11:03, 2.03it/s]
|
834 |
87%|βββββββββ | 9339/10682 [1:19:54<11:02, 2.03it/s]
|
835 |
87%|βββββββββ | 9340/10682 [1:19:55<11:02, 2.03it/s]
|
836 |
87%|βββββββββ | 9341/10682 [1:19:55<11:02, 2.03it/s]
|
837 |
87%|βββββββββ | 9342/10682 [1:19:56<11:02, 2.02it/s]
|
838 |
87%|βββββββββ | 9343/10682 [1:19:56<11:00, 2.03it/s]
|
839 |
87%|βββββββββ | 9344/10682 [1:19:57<11:00, 2.03it/s]
|
840 |
87%|βββββββββ | 9345/10682 [1:19:57<10:59, 2.03it/s]
|
841 |
87%|βββββββββ | 9346/10682 [1:19:58<10:59, 2.03it/s]
|
842 |
88%|βββββββββ | 9347/10682 [1:19:58<10:58, 2.03it/s]
|
843 |
88%|βββββββββ | 9348/10682 [1:19:59<10:58, 2.03it/s]
|
844 |
88%|βββββββββ | 9349/10682 [1:19:59<10:57, 2.03it/s]
|
845 |
88%|βββββββββ | 9350/10682 [1:20:00<10:56, 2.03it/s]
|
846 |
|
|
|
847 |
88%|βββββββββ | 9350/10682 [1:20:00<10:56, 2.03it/s]
|
848 |
88%|βββββββββ | 9351/10682 [1:20:00<10:57, 2.02it/s]
|
849 |
88%|βββββββββ | 9352/10682 [1:20:01<10:56, 2.03it/s]
|
850 |
88%|βββββββββ | 9353/10682 [1:20:01<10:55, 2.03it/s]
|
851 |
88%|βββββββββ | 9354/10682 [1:20:02<10:54, 2.03it/s]
|
852 |
88%|βββββββββ | 9355/10682 [1:20:02<10:54, 2.03it/s]
|
853 |
88%|βββββββββ | 9356/10682 [1:20:03<10:54, 2.03it/s]
|
854 |
88%|βββββββββ | 9357/10682 [1:20:03<10:53, 2.03it/s]
|
855 |
88%|βββββββββ | 9358/10682 [1:20:04<10:53, 2.03it/s]
|
856 |
88%|βββββββββ | 9359/10682 [1:20:04<10:52, 2.03it/s]
|
857 |
88%|βββββββββ | 9360/10682 [1:20:05<10:52, 2.03it/s]
|
858 |
88%|βββββββββ | 9361/10682 [1:20:05<10:51, 2.03it/s]
|
859 |
88%|βββββββββ | 9362/10682 [1:20:06<10:51, 2.03it/s]
|
860 |
88%|βββββββββ | 9363/10682 [1:20:06<10:50, 2.03it/s]
|
861 |
88%|βββββββββ | 9364/10682 [1:20:07<10:50, 2.03it/s]
|
862 |
88%|βββββββββ | 9365/10682 [1:20:07<10:50, 2.03it/s]
|
863 |
88%|βββββββββ | 9366/10682 [1:20:08<10:49, 2.03it/s]
|
864 |
88%|βββββββββ | 9367/10682 [1:20:08<10:49, 2.02it/s]
|
865 |
88%|βββββββββ | 9368/10682 [1:20:09<10:49, 2.02it/s]
|
866 |
88%|βββββββββ | 9369/10682 [1:20:09<10:48, 2.02it/s]
|
867 |
88%|βββββββββ | 9370/10682 [1:20:10<10:48, 2.02it/s]
|
868 |
88%|βββββββββ | 9371/10682 [1:20:10<10:46, 2.03it/s]
|
869 |
88%|βββββββββ | 9372/10682 [1:20:11<10:46, 2.03it/s]
|
870 |
88%|βββββββββ | 9373/10682 [1:20:11<10:45, 2.03it/s]
|
871 |
88%|βββββββββ | 9374/10682 [1:20:12<10:45, 2.03it/s]
|
872 |
88%|βββββββββ | 9375/10682 [1:20:12<10:44, 2.03it/s]{'loss': 2.6218, 'grad_norm': 0.26617616415023804, 'learning_rate': 4.492210053786228e-05, 'epoch': 12.28}
|
|
|
873 |
|
874 |
88%|βββββββββ | 9375/10682 [1:20:12<10:44, 2.03it/s]
|
875 |
88%|βββββββββ | 9376/10682 [1:20:13<10:44, 2.03it/s]
|
876 |
88%|βββββββββ | 9377/10682 [1:20:13<10:44, 2.03it/s]
|
877 |
88%|βββββββββ | 9378/10682 [1:20:14<10:43, 2.03it/s]
|
878 |
88%|βββββββββ | 9379/10682 [1:20:14<10:42, 2.03it/s]
|
879 |
88%|βββββββββ | 9380/10682 [1:20:15<10:42, 2.03it/s]
|
880 |
88%|βββββββββ | 9381/10682 [1:20:15<10:42, 2.03it/s]
|
881 |
88%|βββββββββ | 9382/10682 [1:20:16<10:41, 2.03it/s]
|
882 |
88%|βββββββββ | 9383/10682 [1:20:16<10:40, 2.03it/s]
|
883 |
88%|βββββββββ | 9384/10682 [1:20:17<10:40, 2.03it/s]
|
884 |
88%|βββββββββ | 9385/10682 [1:20:17<10:39, 2.03it/s]
|
885 |
88%|βββββββββ | 9386/10682 [1:20:18<10:39, 2.03it/s]
|
886 |
88%|βββββββββ | 9387/10682 [1:20:18<10:38, 2.03it/s]
|
887 |
88%|βββββββββ | 9388/10682 [1:20:19<10:38, 2.03it/s]
|
888 |
88%|βββββββββ | 9389/10682 [1:20:19<10:37, 2.03it/s]
|
889 |
88%|βββββββββ | 9390/10682 [1:20:19<10:37, 2.03it/s]
|
890 |
88%|βββββββββ | 9391/10682 [1:20:20<10:36, 2.03it/s]
|
891 |
88%|βββββββββ | 9392/10682 [1:20:20<10:36, 2.03it/s]
|
892 |
88%|βββββββββ | 9393/10682 [1:20:21<10:36, 2.03it/s]
|
893 |
88%|βββββββββ | 9394/10682 [1:20:21<10:36, 2.02it/s]
|
894 |
88%|βββββββββ | 9395/10682 [1:20:22<10:35, 2.02it/s]
|
895 |
88%|βββββββββ | 9396/10682 [1:20:22<10:35, 2.02it/s]
|
896 |
88%|βββββββββ | 9397/10682 [1:20:23<10:34, 2.03it/s]
|
897 |
88%|βββββββββ | 9398/10682 [1:20:23<10:33, 2.03it/s]
|
898 |
88%|βββββββββ | 9399/10682 [1:20:24<10:32, 2.03it/s]
|
899 |
88%|βββββββββ | 9400/10682 [1:20:24<10:32, 2.03it/s]{'loss': 2.6316, 'grad_norm': 0.2690180540084839, 'learning_rate': 4.3244996147050855e-05, 'epoch': 12.31}
|
900 |
|
|
|
901 |
88%|βββββββββ | 9400/10682 [1:20:24<10:32, 2.03it/s]
|
902 |
88%|βββββββββ | 9401/10682 [1:20:25<10:32, 2.02it/s]
|
903 |
88%|βββββββββ | 9402/10682 [1:20:25<10:31, 2.03it/s]
|
904 |
88%|βββββββββ | 9403/10682 [1:20:26<10:31, 2.03it/s]
|
905 |
88%|βββββββββ | 9404/10682 [1:20:26<10:30, 2.03it/s]
|
906 |
88%|βββββββββ | 9405/10682 [1:20:27<10:29, 2.03it/s]
|
907 |
88%|βββββββββ | 9406/10682 [1:20:27<10:29, 2.03it/s]
|
908 |
88%|βββββββββ | 9407/10682 [1:20:28<10:28, 2.03it/s]
|
909 |
88%|βββββββββ | 9408/10682 [1:20:28<10:28, 2.03it/s]
|
910 |
88%|βββββββββ | 9409/10682 [1:20:29<10:27, 2.03it/s]
|
911 |
88%|βββββββββ | 9410/10682 [1:20:29<10:27, 2.03it/s]
|
912 |
88%|βββββββββ | 9411/10682 [1:20:30<10:27, 2.03it/s]
|
913 |
88%|βββββββββ | 9412/10682 [1:20:30<10:26, 2.03it/s]
|
914 |
88%|βββββββββ | 9413/10682 [1:20:31<10:25, 2.03it/s]
|
915 |
88%|βββββββββ | 9414/10682 [1:20:31<10:25, 2.03it/s]
|
916 |
88%|βββββββββ | 9415/10682 [1:20:32<10:24, 2.03it/s]
|
917 |
88%|βββββββββ | 9416/10682 [1:20:32<10:23, 2.03it/s]
|
918 |
88%|βββββββββ | 9417/10682 [1:20:33<10:23, 2.03it/s]
|
919 |
88%|βββββββββ | 9418/10682 [1:20:33<10:22, 2.03it/s]
|
920 |
88%|βββββββββ | 9419/10682 [1:20:34<10:22, 2.03it/s]
|
921 |
88%|βββββββββ | 9420/10682 [1:20:34<10:22, 2.03it/s]
|
922 |
88%|βββββββββ | 9421/10682 [1:20:35<10:21, 2.03it/s]
|
923 |
88%|βββββββββ | 9422/10682 [1:20:35<10:20, 2.03it/s]
|
924 |
88%|βββββββββ | 9423/10682 [1:20:36<10:20, 2.03it/s]
|
925 |
88%|βββββββββ | 9424/10682 [1:20:36<10:19, 2.03it/s]
|
926 |
88%|βββββββββ | 9425/10682 [1:20:37<10:18, 2.03it/s]{'loss': 2.629, 'grad_norm': 0.26739317178726196, 'learning_rate': 4.1598380727865315e-05, 'epoch': 12.34}
|
|
|
927 |
|
928 |
88%|βββββββββ | 9425/10682 [1:20:37<10:18, 2.03it/s]
|
929 |
88%|βββββββββ | 9426/10682 [1:20:37<10:19, 2.03it/s]
|
930 |
88%|βββββββββ | 9427/10682 [1:20:38<10:18, 2.03it/s]
|
931 |
88%|βββββββββ | 9428/10682 [1:20:38<10:18, 2.03it/s]
|
932 |
88%|βββββββββ | 9429/10682 [1:20:39<10:17, 2.03it/s]
|
933 |
88%|βββββββββ | 9430/10682 [1:20:39<10:16, 2.03it/s]
|
934 |
88%|βββββββββ | 9431/10682 [1:20:40<10:15, 2.03it/s]
|
935 |
88%|βββββββββ | 9432/10682 [1:20:40<10:15, 2.03it/s]
|
936 |
88%|βββββββββ | 9433/10682 [1:20:41<10:14, 2.03it/s]
|
937 |
88%|βββββββββ | 9434/10682 [1:20:41<10:14, 2.03it/s]
|
938 |
88%|βββββββββ | 9435/10682 [1:20:42<10:14, 2.03it/s]
|
939 |
88%|βββββββββ | 9436/10682 [1:20:42<10:13, 2.03it/s]
|
940 |
88%|βββββββββ | 9437/10682 [1:20:43<10:12, 2.03it/s]
|
941 |
88%|βββββββββ | 9438/10682 [1:20:43<10:12, 2.03it/s]
|
942 |
88%|βββββββββ | 9439/10682 [1:20:44<10:12, 2.03it/s]
|
943 |
88%|βββββββββ | 9440/10682 [1:20:44<10:12, 2.03it/s]
|
944 |
88%|βββββββββ | 9441/10682 [1:20:45<10:11, 2.03it/s]
|
945 |
88%|βββββββββ | 9442/10682 [1:20:45<10:10, 2.03it/s]
|
946 |
88%|βββββββββ | 9443/10682 [1:20:46<10:09, 2.03it/s]
|
947 |
88%|βββββββββ | 9444/10682 [1:20:46<10:09, 2.03it/s]
|
948 |
88%|βββββββββ | 9445/10682 [1:20:47<10:08, 2.03it/s]
|
949 |
88%|βββββββββ | 9446/10682 [1:20:47<10:08, 2.03it/s]
|
950 |
88%|βββββββββ | 9447/10682 [1:20:48<10:08, 2.03it/s]
|
951 |
88%|βββββββββ | 9448/10682 [1:20:48<10:07, 2.03it/s]
|
952 |
88%|βββββββββ | 9449/10682 [1:20:49<10:07, 2.03it/s]
|
953 |
88%|βββββββββ | 9450/10682 [1:20:49<10:06, 2.03it/s]
|
954 |
|
|
|
955 |
88%|βββββββββ | 9450/10682 [1:20:49<10:06, 2.03it/s]
|
956 |
88%|βββββββββ | 9451/10682 [1:20:50<10:07, 2.03it/s]
|
957 |
88%|βββββββββ | 9452/10682 [1:20:50<10:06, 2.03it/s]
|
958 |
88%|βββββββββ | 9453/10682 [1:20:51<10:05, 2.03it/s]
|
959 |
89%|βββββββββ | 9454/10682 [1:20:51<10:05, 2.03it/s]
|
960 |
89%|βββββββββ | 9455/10682 [1:20:52<10:04, 2.03it/s]
|
961 |
89%|βββββββββ | 9456/10682 [1:20:52<10:04, 2.03it/s]
|
962 |
89%|βββββββββ | 9457/10682 [1:20:53<10:03, 2.03it/s]
|
963 |
89%|βββββββββ | 9458/10682 [1:20:53<10:03, 2.03it/s]
|
964 |
89%|βββββββββ | 9459/10682 [1:20:53<10:02, 2.03it/s]
|
965 |
89%|βββββββββ | 9460/10682 [1:20:54<10:02, 2.03it/s]
|
966 |
89%|βββββββββ | 9461/10682 [1:20:54<10:01, 2.03it/s]
|
967 |
89%|βββββββββ | 9462/10682 [1:20:55<10:01, 2.03it/s]
|
968 |
89%|βββββββββ | 9463/10682 [1:20:55<10:00, 2.03it/s]
|
969 |
89%|βββββββββ | 9464/10682 [1:20:56<09:59, 2.03it/s]
|
970 |
89%|βββββββββ | 9465/10682 [1:20:56<09:59, 2.03it/s]
|
971 |
89%|βββββββββ | 9466/10682 [1:20:57<09:59, 2.03it/s]
|
972 |
89%|βββββββββ | 9467/10682 [1:20:57<09:58, 2.03it/s]
|
973 |
89%|βββββββββ | 9468/10682 [1:20:58<09:58, 2.03it/s]
|
974 |
89%|βββββββββ | 9469/10682 [1:20:58<09:57, 2.03it/s]
|
975 |
89%|βββββββββ | 9470/10682 [1:20:59<09:57, 2.03it/s]
|
976 |
89%|βββββββββ | 9471/10682 [1:20:59<09:56, 2.03it/s]
|
977 |
89%|βββββββββ | 9472/10682 [1:21:00<09:55, 2.03it/s]
|
978 |
89%|βββββββββ | 9473/10682 [1:21:00<09:55, 2.03it/s]
|
979 |
89%|βββββββββ | 9474/10682 [1:21:01<09:54, 2.03it/s]
|
980 |
89%|βββββββββ | 9475/10682 [1:21:01<09:54, 2.03it/s]{'loss': 2.6416, 'grad_norm': 0.26726436614990234, 'learning_rate': 3.839705441646779e-05, 'epoch': 12.41}
|
|
|
981 |
|
982 |
89%|βββββββββ | 9475/10682 [1:21:01<09:54, 2.03it/s]
|
983 |
89%|βββββββββ | 9476/10682 [1:21:02<09:54, 2.03it/s]
|
984 |
89%|βββββββββ | 9477/10682 [1:21:02<09:53, 2.03it/s]
|
985 |
89%|βββββββββ | 9478/10682 [1:21:03<09:53, 2.03it/s]
|
986 |
89%|βββββββββ | 9479/10682 [1:21:03<09:52, 2.03it/s]
|
987 |
89%|βββββββββ | 9480/10682 [1:21:04<09:51, 2.03it/s]
|
988 |
89%|βββββββββ | 9481/10682 [1:21:04<09:51, 2.03it/s]
|
989 |
89%|βββββββββ | 9482/10682 [1:21:05<09:51, 2.03it/s]
|
990 |
89%|βββββββββ | 9483/10682 [1:21:05<09:50, 2.03it/s]
|
991 |
89%|βββββββββ | 9484/10682 [1:21:06<09:49, 2.03it/s]
|
992 |
89%|βββββββββ | 9485/10682 [1:21:06<09:50, 2.03it/s]
|
993 |
89%|βββββββββ | 9486/10682 [1:21:07<09:49, 2.03it/s]
|
994 |
89%|βββββββββ | 9487/10682 [1:21:07<09:49, 2.03it/s]
|
995 |
89%|βββββββββ | 9488/10682 [1:21:08<09:48, 2.03it/s]
|
996 |
89%|βββββββββ | 9489/10682 [1:21:08<09:47, 2.03it/s]
|
997 |
89%|βββββββββ | 9490/10682 [1:21:09<09:47, 2.03it/s]
|
998 |
89%|βββββββββ | 9491/10682 [1:21:09<09:46, 2.03it/s]
|
999 |
89%|βββββββββ | 9492/10682 [1:21:10<09:45, 2.03it/s]
|
1000 |
89%|βββββββββ | 9493/10682 [1:21:10<09:45, 2.03it/s]
|
1001 |
89%|βββββββββ | 9494/10682 [1:21:11<09:44, 2.03it/s]
|
1002 |
89%|βββββββββ | 9495/10682 [1:21:11<09:45, 2.03it/s]
|
1003 |
89%|βββββββββ | 9496/10682 [1:21:12<09:44, 2.03it/s]
|
1004 |
89%|βββββββββ | 9497/10682 [1:21:12<09:43, 2.03it/s]
|
1005 |
89%|βββββββββ | 9498/10682 [1:21:13<09:43, 2.03it/s]
|
1006 |
89%|βββββββββ | 9499/10682 [1:21:13<09:41, 2.03it/s]
|
1007 |
89%|βββββββββ | 9500/10682 [1:21:14<09:41, 2.03it/s]
|
1008 |
|
|
|
1009 |
89%|βββββββββ | 9500/10682 [1:21:14<09:41, 2.03it/s]
|
1010 |
89%|βββββββββ | 9501/10682 [1:21:14<09:41, 2.03it/s]
|
1011 |
89%|βββββββββ | 9502/10682 [1:21:15<09:40, 2.03it/s]
|
1012 |
89%|βββββββββ | 9503/10682 [1:21:15<09:40, 2.03it/s]
|
1013 |
89%|βββββββββ | 9504/10682 [1:21:16<09:40, 2.03it/s]
|
1014 |
89%|βββββββββ | 9505/10682 [1:21:16<09:40, 2.03it/s]
|
1015 |
89%|βββββββββ | 9506/10682 [1:21:17<09:39, 2.03it/s]
|
1016 |
89%|βββββββββ | 9507/10682 [1:21:17<09:39, 2.03it/s]
|
1017 |
89%|βββββββββ | 9508/10682 [1:21:18<09:39, 2.03it/s]
|
1018 |
89%|βββββββββ | 9509/10682 [1:21:18<09:38, 2.03it/s]
|
1019 |
89%|βββββββββ | 9510/10682 [1:21:19<09:37, 2.03it/s]
|
1020 |
89%|βββββββββ | 9511/10682 [1:21:19<09:36, 2.03it/s]
|
1021 |
89%|βββββββββ | 9512/10682 [1:21:20<09:36, 2.03it/s]
|
1022 |
89%|βββββββββ | 9513/10682 [1:21:20<09:35, 2.03it/s]
|
1023 |
89%|βββββββββ | 9514/10682 [1:21:21<09:35, 2.03it/s]
|
1024 |
89%|βββββββββ | 9515/10682 [1:21:21<09:34, 2.03it/s]
|
1025 |
89%|βββββββββ | 9516/10682 [1:21:22<09:34, 2.03it/s]
|
1026 |
89%|βββββββββ | 9517/10682 [1:21:22<09:34, 2.03it/s]
|
1027 |
89%|βββββββββ | 9518/10682 [1:21:23<10:24, 1.86it/s]
|
1028 |
89%|βββββββββ | 9519/10682 [1:21:23<10:08, 1.91it/s]
|
1029 |
89%|βββββββββ | 9520/10682 [1:21:24<09:57, 1.94it/s]
|
1030 |
89%|βββββββββ | 9521/10682 [1:21:24<09:49, 1.97it/s]
|
1031 |
89%|βββββββββ | 9522/10682 [1:21:25<09:43, 1.99it/s]
|
1032 |
89%|βββββββββ | 9523/10682 [1:21:25<09:40, 2.00it/s]
|
1033 |
89%|βββββββββ | 9524/10682 [1:21:26<09:36, 2.01it/s]
|
1034 |
89%|βββββββββ | 9525/10682 [1:21:26<09:34, 2.01it/s]{'loss': 2.6339, 'grad_norm': 0.2671761214733124, 'learning_rate': 3.531897635965431e-05, 'epoch': 12.48}
|
|
|
1035 |
|
1036 |
89%|βββββββββ | 9525/10682 [1:21:26<09:34, 2.01it/s]
|
1037 |
89%|βββββββββ | 9526/10682 [1:21:27<09:33, 2.02it/s]
|
1038 |
89%|βββββββββ | 9527/10682 [1:21:27<09:32, 2.02it/s]
|
1039 |
89%|βββββββββ | 9528/10682 [1:21:28<09:30, 2.02it/s]
|
1040 |
89%|βββββββββ | 9529/10682 [1:21:28<09:29, 2.02it/s]
|
1041 |
89%|βββββββββ | 9530/10682 [1:21:29<09:29, 2.02it/s]
|
1042 |
89%|βββββββββ | 9531/10682 [1:21:29<09:27, 2.03it/s]
|
1043 |
89%|βββββββββ | 9532/10682 [1:21:30<09:28, 2.02it/s]
|
1044 |
89%|βββββββββ | 9533/10682 [1:21:30<09:26, 2.03it/s]
|
1045 |
89%|βββββββββ | 9534/10682 [1:21:31<09:26, 2.03it/s]
|
1046 |
89%|βββββββββ | 9535/10682 [1:21:31<09:25, 2.03it/s]
|
1047 |
89%|βββββββββ | 9536/10682 [1:21:32<09:25, 2.03it/s]
|
1048 |
89%|βββββββββ | 9537/10682 [1:21:32<09:24, 2.03it/s]
|
1049 |
89%|βββββββββ | 9538/10682 [1:21:33<09:23, 2.03it/s]
|
1050 |
89%|βββββββββ | 9539/10682 [1:21:33<09:23, 2.03it/s]
|
1051 |
89%|βββββββββ | 9540/10682 [1:21:34<09:23, 2.03it/s]
|
1052 |
89%|βββββββββ | 9541/10682 [1:21:34<09:23, 2.03it/s]
|
1053 |
89%|βββββββββ | 9542/10682 [1:21:35<09:22, 2.03it/s]
|
1054 |
89%|βββββββββ | 9543/10682 [1:21:35<09:21, 2.03it/s]
|
1055 |
89%|βββββββββ | 9544/10682 [1:21:36<09:20, 2.03it/s]
|
1056 |
89%|βββββββββ | 9545/10682 [1:21:36<09:20, 2.03it/s]
|
1057 |
89%|βββββββββ | 9546/10682 [1:21:37<09:20, 2.03it/s]
|
1058 |
89%|βββββββββ | 9547/10682 [1:21:37<09:19, 2.03it/s]
|
1059 |
89%|βββββββββ | 9548/10682 [1:21:38<09:19, 2.03it/s]
|
1060 |
89%|βββββββββ | 9549/10682 [1:21:38<09:18, 2.03it/s]
|
1061 |
89%|βββββββββ | 9550/10682 [1:21:38<09:18, 2.03it/s]{'loss': 2.6398, 'grad_norm': 0.2684043049812317, 'learning_rate': 3.382641354591731e-05, 'epoch': 12.51}
|
|
|
1062 |
|
1063 |
89%|βββββββββ | 9550/10682 [1:21:38<09:18, 2.03it/s]
|
1064 |
89%|βββββββββ | 9551/10682 [1:21:39<09:18, 2.03it/s]
|
1065 |
89%|βββββββββ | 9552/10682 [1:21:39<09:17, 2.03it/s]
|
1066 |
89%|βββββββββ | 9553/10682 [1:21:40<09:17, 2.02it/s]
|
1067 |
89%|βββββββββ | 9554/10682 [1:21:40<09:16, 2.03it/s]
|
1068 |
89%|βββββββββ | 9555/10682 [1:21:41<09:15, 2.03it/s]
|
1069 |
89%|βββββββββ | 9556/10682 [1:21:41<09:15, 2.03it/s]
|
1070 |
89%|βββββββββ | 9557/10682 [1:21:42<09:14, 2.03it/s]
|
1071 |
89%|βββββββββ | 9558/10682 [1:21:42<09:13, 2.03it/s]
|
1072 |
89%|βββββββββ | 9559/10682 [1:21:43<09:14, 2.03it/s]
|
1073 |
89%|βββββββββ | 9560/10682 [1:21:43<09:13, 2.03it/s]
|
1074 |
90%|βββββββββ | 9561/10682 [1:21:44<09:12, 2.03it/s]
|
1075 |
90%|βββββββββ | 9562/10682 [1:21:44<09:12, 2.03it/s]
|
1076 |
90%|βββββββββ | 9563/10682 [1:21:45<09:11, 2.03it/s]
|
1077 |
90%|βββββββββ | 9564/10682 [1:21:45<09:11, 2.03it/s]
|
1078 |
90%|βββββββββ | 9565/10682 [1:21:46<09:11, 2.03it/s]
|
1079 |
90%|βββββββββ | 9566/10682 [1:21:46<09:11, 2.02it/s]
|
1080 |
90%|βββββββββ | 9567/10682 [1:21:47<09:10, 2.02it/s]
|
1081 |
90%|βββββββββ | 9568/10682 [1:21:47<09:10, 2.02it/s]
|
1082 |
90%|βββββββββ | 9569/10682 [1:21:48<09:09, 2.02it/s]
|
1083 |
90%|βββββββββ | 9570/10682 [1:21:48<09:09, 2.03it/s]
|
1084 |
90%|βββββββββ | 9571/10682 [1:21:49<09:08, 2.02it/s]
|
1085 |
90%|βββββββββ | 9572/10682 [1:21:49<09:08, 2.03it/s]
|
1086 |
90%|βββββββββ | 9573/10682 [1:21:50<09:07, 2.03it/s]
|
1087 |
90%|βββββββββ | 9574/10682 [1:21:50<09:06, 2.03it/s]
|
1088 |
90%|βββββββββ | 9575/10682 [1:21:51<09:06, 2.03it/s]{'loss': 2.6295, 'grad_norm': 0.2759605348110199, 'learning_rate': 3.2364968406054075e-05, 'epoch': 12.54}
|
|
|
1089 |
|
1090 |
90%|βββββββββ | 9575/10682 [1:21:51<09:06, 2.03it/s]
|
1091 |
90%|βββββββββ | 9576/10682 [1:21:51<09:06, 2.02it/s]
|
1092 |
90%|βββββββββ | 9577/10682 [1:21:52<09:05, 2.02it/s]
|
1093 |
90%|βββββββββ | 9578/10682 [1:21:52<09:05, 2.02it/s]
|
1094 |
90%|βββββββββ | 9579/10682 [1:21:53<09:04, 2.03it/s]
|
1095 |
90%|βββββββββ | 9580/10682 [1:21:53<09:03, 2.03it/s]
|
1096 |
90%|βββββββββ | 9581/10682 [1:21:54<09:02, 2.03it/s]
|
1097 |
90%|βββββββββ | 9582/10682 [1:21:54<09:02, 2.03it/s]
|
1098 |
90%|βββββββββ | 9583/10682 [1:21:55<09:02, 2.03it/s]
|
1099 |
90%|βββββββββ | 9584/10682 [1:21:55<09:01, 2.03it/s]
|
1100 |
90%|βββββββββ | 9585/10682 [1:21:56<09:01, 2.03it/s]
|
1101 |
90%|βββββββββ | 9586/10682 [1:21:56<09:00, 2.03it/s]
|
1102 |
90%|βββββββββ | 9587/10682 [1:21:57<09:00, 2.03it/s]
|
1103 |
90%|βββββββββ | 9588/10682 [1:21:57<08:59, 2.03it/s]
|
1104 |
90%|βββββββββ | 9589/10682 [1:21:58<08:59, 2.03it/s]
|
1105 |
90%|βββββββββ | 9590/10682 [1:21:58<08:58, 2.03it/s]
|
1106 |
90%|βββββββββ | 9591/10682 [1:21:59<08:57, 2.03it/s]
|
1107 |
90%|βββββββββ | 9592/10682 [1:21:59<08:57, 2.03it/s]
|
1108 |
90%|βββββββββ | 9593/10682 [1:22:00<08:56, 2.03it/s]
|
1109 |
90%|βββββββββ | 9594/10682 [1:22:00<08:57, 2.03it/s]
|
1110 |
90%|βββββββββ | 9595/10682 [1:22:01<08:56, 2.03it/s]
|
1111 |
90%|βββββββββ | 9596/10682 [1:22:01<08:56, 2.03it/s]
|
1112 |
90%|βββββββββ | 9597/10682 [1:22:02<08:55, 2.03it/s]
|
1113 |
90%|βββββββββ | 9598/10682 [1:22:02<08:54, 2.03it/s]
|
1114 |
90%|βββββββββ | 9599/10682 [1:22:03<08:53, 2.03it/s]
|
1115 |
90%|βββββββββ | 9600/10682 [1:22:03<08:53, 2.03it/s]{'loss': 2.6293, 'grad_norm': 0.2649473547935486, 'learning_rate': 3.093473849336781e-05, 'epoch': 12.57}
|
|
|
1116 |
|
1117 |
90%|βββββββββ | 9600/10682 [1:22:03<08:53, 2.03it/s]
|
1118 |
90%|βββββββββ | 9601/10682 [1:22:04<08:54, 2.02it/s]
|
1119 |
90%|βββββββββ | 9602/10682 [1:22:04<08:53, 2.02it/s]
|
1120 |
90%|βββββββββ | 9603/10682 [1:22:05<08:52, 2.02it/s]
|
1121 |
90%|βββββββββ | 9604/10682 [1:22:05<08:51, 2.03it/s]
|
1122 |
90%|βββββββββ | 9605/10682 [1:22:06<08:51, 2.03it/s]
|
1123 |
90%|βββββββββ | 9606/10682 [1:22:06<08:50, 2.03it/s]
|
1124 |
90%|βββββββββ | 9607/10682 [1:22:07<08:50, 2.03it/s]
|
1125 |
90%|βββββββββ | 9608/10682 [1:22:07<08:49, 2.03it/s]
|
1126 |
90%|βββββββββ | 9609/10682 [1:22:08<08:49, 2.03it/s]
|
1127 |
90%|βββββββββ | 9610/10682 [1:22:08<08:49, 2.03it/s]
|
1128 |
90%|βββββββββ | 9611/10682 [1:22:09<09:33, 1.87it/s]
|
1129 |
90%|βββββββββ | 9612/10682 [1:22:09<09:19, 1.91it/s]
|
1130 |
90%|βββββββββ | 9613/10682 [1:22:10<09:09, 1.95it/s]
|
1131 |
90%|βββββββββ | 9614/10682 [1:22:10<09:01, 1.97it/s]
|
1132 |
90%|βββββββββ | 9615/10682 [1:22:11<08:56, 1.99it/s]
|
1133 |
90%|βββββββββ | 9616/10682 [1:22:11<08:52, 2.00it/s]
|
1134 |
90%|βββββββββ | 9617/10682 [1:22:12<08:50, 2.01it/s]
|
1135 |
90%|βββββββββ | 9618/10682 [1:22:12<08:48, 2.01it/s]
|
1136 |
90%|βββββββββ | 9619/10682 [1:22:13<08:46, 2.02it/s]
|
1137 |
90%|βββββββββ | 9620/10682 [1:22:13<08:45, 2.02it/s]
|
1138 |
90%|βββββββββ | 9621/10682 [1:22:14<08:44, 2.02it/s]
|
1139 |
90%|βββββββββ | 9622/10682 [1:22:14<08:44, 2.02it/s]
|
1140 |
90%|βββββββββ | 9623/10682 [1:22:15<08:43, 2.02it/s]
|
1141 |
90%|βββββββββ | 9624/10682 [1:22:15<08:42, 2.02it/s]
|
1142 |
90%|βββββββββ | 9625/10682 [1:22:16<08:41, 2.03it/s]{'loss': 2.6392, 'grad_norm': 0.26671603322029114, 'learning_rate': 2.9535819277506203e-05, 'epoch': 12.61}
|
|
|
1143 |
|
1144 |
90%|βββββββββ | 9625/10682 [1:22:16<08:41, 2.03it/s]
|
1145 |
90%|βββββββββ | 9626/10682 [1:22:16<08:41, 2.02it/s]
|
1146 |
90%|βββββββββ | 9627/10682 [1:22:17<08:40, 2.03it/s]
|
1147 |
90%|βββββββββ | 9628/10682 [1:22:17<08:40, 2.03it/s]
|
1148 |
90%|βββββββββ | 9629/10682 [1:22:18<08:39, 2.03it/s]
|
1149 |
90%|βββββββββ | 9630/10682 [1:22:18<08:39, 2.03it/s]
|
1150 |
90%|βββββββββ | 9631/10682 [1:22:19<08:38, 2.03it/s]
|
1151 |
90%|βββββββββ | 9632/10682 [1:22:19<08:38, 2.03it/s]
|
1152 |
90%|βββββββββ | 9633/10682 [1:22:20<08:37, 2.03it/s]
|
1153 |
90%|βββββββββ | 9634/10682 [1:22:20<08:37, 2.03it/s]
|
1154 |
90%|βββββββββ | 9635/10682 [1:22:21<08:36, 2.03it/s]
|
1155 |
90%|βββββββββ | 9636/10682 [1:22:21<08:35, 2.03it/s]
|
1156 |
90%|βββββββββ | 9637/10682 [1:22:22<08:35, 2.03it/s]
|
1157 |
90%|βββββββββ | 9638/10682 [1:22:22<08:35, 2.03it/s]
|
1158 |
90%|βββββββββ | 9639/10682 [1:22:23<08:34, 2.03it/s]
|
1159 |
90%|βββββββββ | 9640/10682 [1:22:23<08:34, 2.03it/s]
|
1160 |
90%|βββββββββ | 9641/10682 [1:22:24<08:33, 2.03it/s]
|
1161 |
90%|βββββββββ | 9642/10682 [1:22:24<08:32, 2.03it/s]
|
1162 |
90%|βββββββββ | 9643/10682 [1:22:25<08:32, 2.03it/s]
|
1163 |
90%|βββββββββ | 9644/10682 [1:22:25<08:31, 2.03it/s]
|
1164 |
90%|βββββββββ | 9645/10682 [1:22:26<08:31, 2.03it/s]
|
1165 |
90%|βββββββββ | 9646/10682 [1:22:26<08:30, 2.03it/s]
|
1166 |
90%|βββββββββ | 9647/10682 [1:22:26<08:29, 2.03it/s]
|
1167 |
90%|βββββββββ | 9648/10682 [1:22:27<08:29, 2.03it/s]
|
1168 |
90%|βββββββββ | 9649/10682 [1:22:27<08:28, 2.03it/s]
|
1169 |
90%|βββββββββ | 9650/10682 [1:22:28<08:27, 2.03it/s]{'loss': 2.6413, 'grad_norm': 0.27070415019989014, 'learning_rate': 2.8168304138088295e-05, 'epoch': 12.64}
|
1170 |
|
|
|
1171 |
90%|βββββββββ | 9650/10682 [1:22:28<08:27, 2.03it/s]
|
1172 |
90%|βββββββββ | 9651/10682 [1:22:28<08:28, 2.03it/s]
|
1173 |
90%|βββββββββ | 9652/10682 [1:22:29<08:27, 2.03it/s]
|
1174 |
90%|βββββββββ | 9653/10682 [1:22:29<08:27, 2.03it/s]
|
1175 |
90%|βββββββββ | 9654/10682 [1:22:30<08:26, 2.03it/s]
|
1176 |
90%|βββββββββ | 9655/10682 [1:22:30<08:26, 2.03it/s]
|
1177 |
90%|βββββββββ | 9656/10682 [1:22:31<08:25, 2.03it/s]
|
1178 |
90%|βββββββββ | 9657/10682 [1:22:31<08:25, 2.03it/s]
|
1179 |
90%|βββββββββ | 9658/10682 [1:22:32<08:25, 2.03it/s]
|
1180 |
90%|βββββββββ | 9659/10682 [1:22:32<08:24, 2.03it/s]
|
1181 |
90%|βββββββββ | 9660/10682 [1:22:33<08:24, 2.02it/s]
|
1182 |
90%|βββββββββ | 9661/10682 [1:22:33<08:24, 2.03it/s]
|
1183 |
90%|βββββββββ | 9662/10682 [1:22:34<08:24, 2.02it/s]
|
1184 |
90%|βββββββββ | 9663/10682 [1:22:34<08:23, 2.02it/s]
|
1185 |
90%|βββββββββ | 9664/10682 [1:22:35<08:23, 2.02it/s]
|
1186 |
90%|βββββββββ | 9665/10682 [1:22:35<08:23, 2.02it/s]
|
1187 |
90%|βββββββββ | 9666/10682 [1:22:36<08:22, 2.02it/s]
|
1188 |
90%|βββββββββ | 9667/10682 [1:22:36<08:21, 2.02it/s]
|
1189 |
91%|βββββββββ | 9668/10682 [1:22:37<08:21, 2.02it/s]
|
1190 |
91%|βββββββββ | 9669/10682 [1:22:37<08:20, 2.03it/s]
|
1191 |
91%|βββββββββ | 9670/10682 [1:22:38<08:19, 2.03it/s]
|
1192 |
91%|βββββββββ | 9671/10682 [1:22:38<08:19, 2.03it/s]
|
1193 |
91%|βββββββββ | 9672/10682 [1:22:39<08:18, 2.03it/s]
|
1194 |
91%|βββββββββ | 9673/10682 [1:22:39<08:17, 2.03it/s]
|
1195 |
91%|βββββββββ | 9674/10682 [1:22:40<08:16, 2.03it/s]
|
1196 |
91%|βββββββββ | 9675/10682 [1:22:40<08:16, 2.03it/s]{'loss': 2.6238, 'grad_norm': 0.26584649085998535, 'learning_rate': 2.6832284358471516e-05, 'epoch': 12.67}
|
|
|
1197 |
|
1198 |
91%|βββββββββ | 9675/10682 [1:22:40<08:16, 2.03it/s]
|
1199 |
91%|βββββββββ | 9676/10682 [1:22:41<08:16, 2.02it/s]
|
1200 |
91%|βββββββββ | 9677/10682 [1:22:41<08:16, 2.02it/s]
|
1201 |
91%|βββββββββ | 9678/10682 [1:22:42<08:15, 2.02it/s]
|
1202 |
91%|βββββββββ | 9679/10682 [1:22:42<08:14, 2.03it/s]
|
1203 |
91%|βββββββββ | 9680/10682 [1:22:43<08:14, 2.03it/s]
|
1204 |
91%|βββββββββ | 9681/10682 [1:22:43<08:14, 2.03it/s]
|
1205 |
91%|βββββββββ | 9682/10682 [1:22:44<08:13, 2.02it/s]
|
1206 |
91%|βββββββββ | 9683/10682 [1:22:44<08:13, 2.02it/s]
|
1207 |
91%|βββββββββ | 9684/10682 [1:22:45<08:13, 2.02it/s]
|
1208 |
91%|βββββββββ | 9685/10682 [1:22:45<08:12, 2.03it/s]
|
1209 |
91%|βββββββββ | 9686/10682 [1:22:46<08:11, 2.03it/s]
|
1210 |
91%|βββββββββ | 9687/10682 [1:22:46<08:10, 2.03it/s]
|
1211 |
91%|βββββββββ | 9688/10682 [1:22:47<08:10, 2.03it/s]
|
1212 |
91%|βββββββββ | 9689/10682 [1:22:47<08:10, 2.03it/s]
|
1213 |
91%|βββββββββ | 9690/10682 [1:22:48<08:08, 2.03it/s]
|
1214 |
91%|βββββββββ | 9691/10682 [1:22:48<08:08, 2.03it/s]
|
1215 |
91%|βββββββββ | 9692/10682 [1:22:49<08:08, 2.03it/s]
|
1216 |
91%|βββββββββ | 9693/10682 [1:22:49<08:07, 2.03it/s]
|
1217 |
91%|βββββββββ | 9694/10682 [1:22:50<08:08, 2.02it/s]
|
1218 |
91%|βββββββββ | 9695/10682 [1:22:50<08:07, 2.02it/s]
|
1219 |
91%|βββββββββ | 9696/10682 [1:22:51<08:06, 2.03it/s]
|
1220 |
91%|βββββββββ | 9697/10682 [1:22:51<08:06, 2.03it/s]
|
1221 |
91%|βββββββββ | 9698/10682 [1:22:52<08:04, 2.03it/s]
|
1222 |
91%|βββββββββ | 9699/10682 [1:22:52<08:04, 2.03it/s]
|
1223 |
91%|βββββββββ | 9700/10682 [1:22:53<08:04, 2.03it/s]
|
1224 |
{'loss': 2.6376, 'grad_norm': 0.266446590423584, 'learning_rate': 2.5527849119658387e-05, 'epoch': 12.7}
|
|
|
1225 |
91%|βββββββββ | 9700/10682 [1:22:53<08:04, 2.03it/s]
|
1226 |
91%|βββββββββ | 9701/10682 [1:22:53<08:05, 2.02it/s]
|
1227 |
91%|βββββββββ | 9702/10682 [1:22:54<08:04, 2.02it/s]
|
1228 |
91%|βββββββββ | 9703/10682 [1:22:54<08:03, 2.02it/s]
|
1229 |
91%|βββββββββ | 9704/10682 [1:22:55<08:02, 2.03it/s]
|
1230 |
91%|βββββββββ | 9705/10682 [1:22:55<08:02, 2.02it/s]
|
1231 |
91%|βββββββββ | 9706/10682 [1:22:56<08:02, 2.02it/s]
|
1232 |
91%|βββββββββ | 9707/10682 [1:22:56<08:02, 2.02it/s]
|
1233 |
91%|βββββββββ | 9708/10682 [1:22:57<08:00, 2.03it/s]
|
1234 |
91%|βββββββββ | 9709/10682 [1:22:57<08:00, 2.02it/s]
|
1235 |
91%|βββββββββ | 9710/10682 [1:22:58<07:59, 2.03it/s]
|
1236 |
91%|βββββββββ | 9711/10682 [1:22:58<07:58, 2.03it/s]
|
1237 |
91%|βββββββββ | 9712/10682 [1:22:59<07:58, 2.03it/s]
|
1238 |
91%|βββββββββ | 9713/10682 [1:22:59<07:57, 2.03it/s]
|
1239 |
91%|βββββββββ | 9714/10682 [1:23:00<07:57, 2.03it/s]
|
1240 |
91%|βββββββββ | 9715/10682 [1:23:00<07:56, 2.03it/s]
|
1241 |
91%|βββββββββ | 9716/10682 [1:23:01<07:55, 2.03it/s]
|
1242 |
91%|βββββββββ | 9717/10682 [1:23:01<07:54, 2.03it/s]
|
1243 |
91%|βββββββββ | 9718/10682 [1:23:02<07:54, 2.03it/s]
|
1244 |
91%|βββββββββ | 9719/10682 [1:23:02<07:54, 2.03it/s]
|
1245 |
91%|βββββββββ | 9720/10682 [1:23:03<07:54, 2.03it/s]
|
1246 |
91%|βββββββββ | 9721/10682 [1:23:03<07:53, 2.03it/s]
|
1247 |
91%|βββββββββ | 9722/10682 [1:23:03<07:53, 2.03it/s]
|
1248 |
91%|βββββββββ | 9723/10682 [1:23:04<07:53, 2.03it/s]
|
1249 |
91%|βββββββββ | 9724/10682 [1:23:04<07:52, 2.03it/s]
|
1250 |
91%|βββββββββ | 9725/10682 [1:23:05<07:52, 2.03it/s]
|
1251 |
{'loss': 2.6387, 'grad_norm': 0.2688989043235779, 'learning_rate': 2.4255085494343522e-05, 'epoch': 12.74}
|
|
|
1252 |
91%|βββββββββ | 9725/10682 [1:23:05<07:52, 2.03it/s]
|
1253 |
91%|βββββββββ | 9726/10682 [1:23:05<07:52, 2.02it/s]
|
1254 |
91%|βββββββββ | 9727/10682 [1:23:06<07:51, 2.03it/s]
|
1255 |
91%|βββββββββ | 9728/10682 [1:23:06<07:50, 2.03it/s]
|
1256 |
91%|βββββββββ | 9729/10682 [1:23:07<07:50, 2.03it/s]
|
1257 |
91%|βββββββββ | 9730/10682 [1:23:07<07:49, 2.03it/s]
|
1258 |
91%|βββββββββ | 9731/10682 [1:23:08<07:48, 2.03it/s]
|
1259 |
91%|βββββββββ | 9732/10682 [1:23:08<07:48, 2.03it/s]
|
1260 |
91%|βββββββββ | 9733/10682 [1:23:09<07:47, 2.03it/s]
|
1261 |
91%|βββββββββ | 9734/10682 [1:23:09<07:47, 2.03it/s]
|
1262 |
91%|βββββββββ | 9735/10682 [1:23:10<07:47, 2.03it/s]
|
1263 |
91%|βββββββββ | 9736/10682 [1:23:10<07:46, 2.03it/s]
|
1264 |
91%|βββββββββ | 9737/10682 [1:23:11<07:46, 2.02it/s]
|
1265 |
91%|βββββββββ | 9738/10682 [1:23:11<07:46, 2.02it/s]
|
1266 |
91%|βββββββββ | 9739/10682 [1:23:12<07:45, 2.02it/s]
|
1267 |
91%|βββββββββ | 9740/10682 [1:23:12<07:45, 2.03it/s]
|
1268 |
91%|βββββββββ | 9741/10682 [1:23:13<07:44, 2.02it/s]
|
1269 |
91%|βββββββββ | 9742/10682 [1:23:13<07:44, 2.03it/s]
|
1270 |
91%|βββββββββ | 9743/10682 [1:23:14<07:43, 2.02it/s]
|
1271 |
91%|βββββββββ | 9744/10682 [1:23:14<07:43, 2.03it/s]
|
1272 |
91%|βββββββββ | 9745/10682 [1:23:15<07:42, 2.02it/s]
|
1273 |
91%|βββββββββ | 9746/10682 [1:23:15<07:41, 2.03it/s]
|
1274 |
91%|βββββββββ | 9747/10682 [1:23:16<07:41, 2.03it/s]
|
1275 |
91%|ββββββββββ| 9748/10682 [1:23:16<07:40, 2.03it/s]
|
1276 |
91%|ββββββββββ| 9749/10682 [1:23:17<07:39, 2.03it/s]
|
1277 |
91%|ββββββββββ| 9750/10682 [1:23:17<07:39, 2.03it/s]{'loss': 2.6414, 'grad_norm': 0.269853413105011, 'learning_rate': 2.301407844110154e-05, 'epoch': 12.77}
|
|
|
1278 |
|
1279 |
91%|ββββββββββ| 9750/10682 [1:23:17<07:39, 2.03it/s]
|
1280 |
91%|ββββββββββ| 9751/10682 [1:23:18<07:39, 2.02it/s]
|
1281 |
91%|ββββββββββ| 9752/10682 [1:23:18<07:39, 2.03it/s]
|
1282 |
91%|ββββββββββ| 9753/10682 [1:23:19<07:38, 2.02it/s]
|
1283 |
91%|ββββββββββ| 9754/10682 [1:23:19<07:38, 2.03it/s]
|
1284 |
91%|ββββββββββ| 9755/10682 [1:23:20<07:37, 2.03it/s]
|
1285 |
91%|ββββββββββ| 9756/10682 [1:23:20<07:36, 2.03it/s]
|
1286 |
91%|ββββββββββ| 9757/10682 [1:23:21<07:36, 2.03it/s]
|
1287 |
91%|ββββββββββ| 9758/10682 [1:23:21<07:35, 2.03it/s]
|
1288 |
91%|ββββββββββ| 9759/10682 [1:23:22<07:35, 2.03it/s]
|
1289 |
91%|ββββββββββ| 9760/10682 [1:23:22<07:34, 2.03it/s]
|
1290 |
91%|ββββββββββ| 9761/10682 [1:23:23<07:33, 2.03it/s]
|
1291 |
91%|ββββββββββ| 9762/10682 [1:23:23<07:34, 2.02it/s]
|
1292 |
91%|ββββββββββ| 9763/10682 [1:23:24<07:34, 2.02it/s]
|
1293 |
91%|ββββββββββ| 9764/10682 [1:23:24<07:33, 2.02it/s]
|
1294 |
91%|ββββββββββ| 9765/10682 [1:23:25<07:33, 2.02it/s]
|
1295 |
91%|ββββββββββ| 9766/10682 [1:23:25<07:32, 2.02it/s]
|
1296 |
91%|ββββββββββ| 9767/10682 [1:23:26<07:32, 2.02it/s]
|
1297 |
91%|ββββββββββ| 9768/10682 [1:23:26<07:31, 2.02it/s]
|
1298 |
91%|ββββββββββ| 9769/10682 [1:23:27<07:30, 2.03it/s]
|
1299 |
91%|ββββββββββ| 9770/10682 [1:23:27<07:29, 2.03it/s]
|
1300 |
91%|ββββββββββ| 9771/10682 [1:23:28<07:29, 2.03it/s]
|
1301 |
91%|ββββββββββ| 9772/10682 [1:23:28<07:28, 2.03it/s]
|
1302 |
91%|ββββββββββ| 9773/10682 [1:23:29<07:27, 2.03it/s]
|
1303 |
91%|ββββββββββ| 9774/10682 [1:23:29<07:27, 2.03it/s]
|
1304 |
92%|ββββββββββ| 9775/10682 [1:23:30<07:27, 2.03it/s]
|
1305 |
{'loss': 2.639, 'grad_norm': 0.2653633952140808, 'learning_rate': 2.1804910798715826e-05, 'epoch': 12.8}
|
|
|
1306 |
92%|ββββββββββ| 9775/10682 [1:23:30<07:27, 2.03it/s]
|
1307 |
92%|ββββββββββ| 9776/10682 [1:23:30<07:27, 2.02it/s]
|
1308 |
92%|ββββββββββ| 9777/10682 [1:23:31<07:26, 2.03it/s]
|
1309 |
92%|ββββββββββ| 9778/10682 [1:23:31<07:26, 2.02it/s]
|
1310 |
92%|ββββββββββ| 9779/10682 [1:23:32<07:25, 2.02it/s]
|
1311 |
92%|ββββββββββ| 9780/10682 [1:23:32<07:25, 2.03it/s]
|
1312 |
92%|ββββββββββ| 9781/10682 [1:23:33<07:24, 2.03it/s]
|
1313 |
92%|ββββββββββ| 9782/10682 [1:23:33<07:24, 2.03it/s]
|
1314 |
92%|ββββββββββ| 9783/10682 [1:23:34<07:24, 2.02it/s]
|
1315 |
92%|ββββββββββ| 9784/10682 [1:23:34<07:23, 2.02it/s]
|
1316 |
92%|ββββββββββ| 9785/10682 [1:23:35<07:23, 2.02it/s]
|
1317 |
92%|ββββββββββ| 9786/10682 [1:23:35<07:22, 2.02it/s]
|
1318 |
92%|ββββββββββ| 9787/10682 [1:23:36<07:22, 2.02it/s]
|
1319 |
92%|ββββββββββ| 9788/10682 [1:23:36<07:21, 2.02it/s]
|
1320 |
92%|ββββββββββ| 9789/10682 [1:23:37<07:21, 2.02it/s]
|
1321 |
92%|ββββββββββ| 9790/10682 [1:23:37<07:20, 2.03it/s]
|
1322 |
92%|ββββββββββ| 9791/10682 [1:23:38<07:19, 2.03it/s]
|
1323 |
92%|ββββββββββ| 9792/10682 [1:23:38<07:19, 2.03it/s]
|
1324 |
92%|ββββββββββ| 9793/10682 [1:23:39<07:18, 2.03it/s]
|
1325 |
92%|ββββββββββ| 9794/10682 [1:23:39<07:18, 2.02it/s]
|
1326 |
92%|ββββββββββ| 9795/10682 [1:23:40<07:17, 2.03it/s]
|
1327 |
92%|ββββββββββ| 9796/10682 [1:23:40<07:18, 2.02it/s]
|
1328 |
92%|ββββββββββ| 9797/10682 [1:23:41<07:17, 2.02it/s]
|
1329 |
92%|ββββββββββ| 9798/10682 [1:23:41<07:17, 2.02it/s]
|
1330 |
92%|ββββββββββ| 9799/10682 [1:23:42<07:16, 2.02it/s]
|
1331 |
92%|ββββββββββ| 9800/10682 [1:23:42<07:15, 2.03it/s]{'loss': 2.6445, 'grad_norm': 0.2692641615867615, 'learning_rate': 2.0627663280649135e-05, 'epoch': 12.84}
|
|
|
1332 |
|
1333 |
92%|ββββββββββ| 9800/10682 [1:23:42<07:15, 2.03it/s]
|
1334 |
92%|ββββββββββ| 9801/10682 [1:23:42<07:15, 2.02it/s]
|
1335 |
92%|ββββββββββ| 9802/10682 [1:23:43<07:14, 2.02it/s]
|
1336 |
92%|ββββββββββ| 9803/10682 [1:23:43<07:14, 2.03it/s]
|
1337 |
92%|ββββββββββ| 9804/10682 [1:23:44<07:13, 2.03it/s]
|
1338 |
92%|ββββββββββ| 9805/10682 [1:23:44<07:13, 2.02it/s]
|
1339 |
92%|ββββββββββ| 9806/10682 [1:23:45<07:12, 2.03it/s]
|
1340 |
92%|ββββββββββ| 9807/10682 [1:23:45<07:11, 2.03it/s]
|
1341 |
92%|ββββββββββ| 9808/10682 [1:23:46<07:11, 2.03it/s]
|
1342 |
92%|ββββββββββ| 9809/10682 [1:23:46<07:10, 2.03it/s]
|
1343 |
92%|ββββββββββ| 9810/10682 [1:23:47<07:09, 2.03it/s]
|
1344 |
92%|ββββββββββ| 9811/10682 [1:23:47<07:09, 2.03it/s]
|
1345 |
92%|ββββββββββ| 9812/10682 [1:23:48<07:09, 2.03it/s]
|
1346 |
92%|ββββββββββ| 9813/10682 [1:23:48<07:08, 2.03it/s]
|
1347 |
92%|ββββββββββ| 9814/10682 [1:23:49<07:08, 2.02it/s]
|
1348 |
92%|ββββββββββ| 9815/10682 [1:23:49<07:07, 2.03it/s]
|
1349 |
92%|ββββββββββ| 9816/10682 [1:23:50<07:07, 2.03it/s]
|
1350 |
92%|ββββββββββ| 9817/10682 [1:23:50<07:06, 2.03it/s]
|
1351 |
92%|ββββββββββ| 9818/10682 [1:23:51<07:05, 2.03it/s]
|
1352 |
92%|ββββββββββ| 9819/10682 [1:23:51<07:05, 2.03it/s]
|
1353 |
92%|ββββββββββ| 9820/10682 [1:23:52<07:04, 2.03it/s]
|
1354 |
92%|ββββββββββ| 9821/10682 [1:23:52<07:04, 2.03it/s]
|
1355 |
92%|ββββββββββ| 9822/10682 [1:23:53<07:04, 2.03it/s]
|
1356 |
92%|ββββββββββ| 9823/10682 [1:23:53<07:03, 2.03it/s]
|
1357 |
92%|ββββββββββ| 9824/10682 [1:23:54<07:03, 2.03it/s]
|
1358 |
92%|ββββββββββ| 9825/10682 [1:23:54<07:02, 2.03it/s]{'loss': 2.6394, 'grad_norm': 0.26735222339630127, 'learning_rate': 1.9482414469655486e-05, 'epoch': 12.87}
|
|
|
1359 |
|
1360 |
92%|ββββββββββ| 9825/10682 [1:23:54<07:02, 2.03it/s]
|
1361 |
92%|ββββββββββ| 9826/10682 [1:23:55<07:03, 2.02it/s]
|
1362 |
92%|ββββββββββ| 9827/10682 [1:23:55<07:02, 2.02it/s]
|
1363 |
92%|ββββββββββ| 9828/10682 [1:23:56<07:02, 2.02it/s]
|
1364 |
92%|ββββββββββ| 9829/10682 [1:23:56<07:01, 2.02it/s]
|
1365 |
92%|ββββββββββ| 9830/10682 [1:23:57<07:00, 2.03it/s]
|
1366 |
92%|ββββββββββ| 9831/10682 [1:23:57<07:00, 2.02it/s]
|
1367 |
92%|ββββββββββ| 9832/10682 [1:23:58<06:59, 2.03it/s]
|
1368 |
92%|ββββββββββ| 9833/10682 [1:23:58<06:58, 2.03it/s]
|
1369 |
92%|ββββββββββ| 9834/10682 [1:23:59<06:58, 2.03it/s]
|
1370 |
92%|ββββββββββ| 9835/10682 [1:23:59<06:57, 2.03it/s]
|
1371 |
92%|ββββββββββ| 9836/10682 [1:24:00<06:57, 2.03it/s]
|
1372 |
92%|ββββββββββ| 9837/10682 [1:24:00<06:56, 2.03it/s]
|
1373 |
92%|ββββββββββ| 9838/10682 [1:24:01<06:56, 2.03it/s]
|
1374 |
92%|ββββββββββ| 9839/10682 [1:24:01<06:55, 2.03it/s]
|
1375 |
92%|ββββββββββ| 9840/10682 [1:24:02<06:55, 2.03it/s]
|
1376 |
92%|ββββββββββ| 9841/10682 [1:24:02<06:54, 2.03it/s]
|
1377 |
92%|ββββββββββ| 9842/10682 [1:24:03<06:54, 2.03it/s]
|
1378 |
92%|ββββββββββ| 9843/10682 [1:24:03<06:53, 2.03it/s]
|
1379 |
92%|ββββββββββ| 9844/10682 [1:24:04<06:53, 2.03it/s]
|
1380 |
92%|ββββββββββ| 9845/10682 [1:24:04<06:52, 2.03it/s]
|
1381 |
92%|ββββββββββ| 9846/10682 [1:24:05<06:51, 2.03it/s]
|
1382 |
92%|ββββββββββ| 9847/10682 [1:24:05<06:51, 2.03it/s]
|
1383 |
92%|ββββββββββ| 9848/10682 [1:24:06<06:51, 2.03it/s]
|
1384 |
92%|ββββββββββ| 9849/10682 [1:24:06<06:51, 2.03it/s]
|
1385 |
92%|ββββββββββ| 9850/10682 [1:24:07<06:50, 2.03it/s]
|
1386 |
|
|
|
1387 |
92%|ββββββββββ| 9850/10682 [1:24:07<06:50, 2.03it/s]
|
1388 |
92%|ββββββββββ| 9851/10682 [1:24:07<06:50, 2.02it/s]
|
1389 |
92%|ββββββββββ| 9852/10682 [1:24:08<06:49, 2.03it/s]
|
1390 |
92%|ββββββββββ| 9853/10682 [1:24:08<06:49, 2.02it/s]
|
1391 |
92%|ββββββββββ| 9854/10682 [1:24:09<06:48, 2.03it/s]
|
1392 |
92%|ββββββββββ| 9855/10682 [1:24:09<06:48, 2.03it/s]
|
1393 |
92%|ββββββββββ| 9856/10682 [1:24:10<06:47, 2.03it/s]
|
1394 |
92%|ββββββββββ| 9857/10682 [1:24:10<06:47, 2.03it/s]
|
1395 |
92%|ββββββββββ| 9858/10682 [1:24:11<06:46, 2.03it/s]
|
1396 |
92%|ββββββββββ| 9859/10682 [1:24:11<06:45, 2.03it/s]
|
1397 |
92%|ββββββββββ| 9860/10682 [1:24:12<06:46, 2.02it/s]
|
1398 |
92%|ββββββββββ| 9861/10682 [1:24:12<06:45, 2.03it/s]
|
1399 |
92%|ββββββββββ| 9862/10682 [1:24:13<06:44, 2.03it/s]
|
1400 |
92%|ββββββββββ| 9863/10682 [1:24:13<06:43, 2.03it/s]
|
1401 |
92%|ββββββββββ| 9864/10682 [1:24:14<06:43, 2.03it/s]
|
1402 |
92%|ββββββββββ| 9865/10682 [1:24:14<06:42, 2.03it/s]
|
1403 |
92%|ββββββββββ| 9866/10682 [1:24:15<06:42, 2.03it/s]
|
1404 |
92%|ββββββββββ| 9867/10682 [1:24:15<06:41, 2.03it/s]
|
1405 |
92%|ββββββββββ| 9868/10682 [1:24:16<06:41, 2.03it/s]
|
1406 |
92%|ββββββββββ| 9869/10682 [1:24:16<06:40, 2.03it/s]
|
1407 |
92%|ββββββββββ| 9870/10682 [1:24:17<06:40, 2.03it/s]
|
1408 |
92%|ββββββββββ| 9871/10682 [1:24:17<06:39, 2.03it/s]
|
1409 |
92%|ββββββββββ| 9872/10682 [1:24:18<06:39, 2.03it/s]
|
1410 |
92%|ββββββββββ| 9873/10682 [1:24:18<06:38, 2.03it/s]
|
1411 |
92%|ββββββββββ| 9874/10682 [1:24:19<06:38, 2.03it/s]
|
1412 |
92%|ββββββββββ| 9875/10682 [1:24:19<06:37, 2.03it/s]
|
1413 |
{'loss': 2.6349, 'grad_norm': 0.27050691843032837, 'learning_rate': 1.7288216615031272e-05, 'epoch': 12.93}
|
|
|
1414 |
92%|ββββββββββ| 9875/10682 [1:24:19<06:37, 2.03it/s]
|
1415 |
92%|ββββββββββ| 9876/10682 [1:24:19<06:37, 2.03it/s]
|
1416 |
92%|ββββββββββ| 9877/10682 [1:24:20<06:37, 2.03it/s]
|
1417 |
92%|ββββββββββ| 9878/10682 [1:24:20<06:36, 2.03it/s]
|
1418 |
92%|ββββββββββ| 9879/10682 [1:24:21<06:35, 2.03it/s]
|
1419 |
92%|ββββββββββ| 9880/10682 [1:24:21<06:35, 2.03it/s]
|
1420 |
93%|ββββββββββ| 9881/10682 [1:24:22<06:34, 2.03it/s]
|
1421 |
93%|ββββββββββ| 9882/10682 [1:24:22<06:34, 2.03it/s]
|
1422 |
93%|ββββββββββ| 9883/10682 [1:24:23<06:33, 2.03it/s]
|
1423 |
93%|ββββββββββ| 9884/10682 [1:24:23<06:34, 2.03it/s]
|
1424 |
93%|ββββββββββ| 9885/10682 [1:24:24<06:33, 2.03it/s]
|
1425 |
93%|ββββββββββ| 9886/10682 [1:24:24<06:32, 2.03it/s]
|
1426 |
93%|ββββββββββ| 9887/10682 [1:24:25<06:32, 2.03it/s]
|
1427 |
93%|ββββββββββ| 9888/10682 [1:24:25<06:30, 2.03it/s]
|
1428 |
93%|ββββββββββ| 9889/10682 [1:24:26<06:31, 2.03it/s]
|
1429 |
93%|ββββββββββ| 9890/10682 [1:24:26<06:30, 2.03it/s]
|
1430 |
93%|ββββββββββ| 9891/10682 [1:24:27<06:29, 2.03it/s]
|
1431 |
93%|ββββββββββ| 9892/10682 [1:24:27<06:29, 2.03it/s]
|
1432 |
93%|ββββββββββ| 9893/10682 [1:24:28<06:28, 2.03it/s]
|
1433 |
93%|ββββββββββ| 9894/10682 [1:24:28<06:28, 2.03it/s]
|
1434 |
93%|ββββββββββ| 9895/10682 [1:24:29<06:27, 2.03it/s]
|
1435 |
93%|ββββββββββ| 9896/10682 [1:24:29<06:27, 2.03it/s]
|
1436 |
93%|ββββββββββ| 9897/10682 [1:24:30<06:26, 2.03it/s]
|
1437 |
93%|ββββββββββ| 9898/10682 [1:24:30<06:26, 2.03it/s]
|
1438 |
93%|ββββββββββ| 9899/10682 [1:24:31<06:25, 2.03it/s]
|
1439 |
93%|ββββββββββ| 9900/10682 [1:24:31<06:25, 2.03it/s]{'loss': 2.6377, 'grad_norm': 0.2702299952507019, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
|
|
1440 |
|
1441 |
93%|ββββββββββ| 9900/10682 [1:24:31<06:25, 2.03it/s]
|
1442 |
93%|ββββββββββ| 9901/10682 [1:24:32<06:25, 2.03it/s]
|
1443 |
93%|ββββββββββ| 9902/10682 [1:24:32<06:24, 2.03it/s]
|
1444 |
93%|ββββββββββ| 9903/10682 [1:24:33<06:24, 2.03it/s]
|
1445 |
93%|ββββββββββ| 9904/10682 [1:24:33<06:23, 2.03it/s]
|
1446 |
93%|ββββββββββ| 9905/10682 [1:24:34<06:23, 2.03it/s]
|
1447 |
93%|ββββββββββ| 9906/10682 [1:24:34<06:22, 2.03it/s]
|
1448 |
93%|ββββββββββ| 9907/10682 [1:24:35<06:22, 2.03it/s]
|
1449 |
93%|ββββββββββ| 9908/10682 [1:24:35<06:22, 2.02it/s]
|
1450 |
93%|ββββββββββ| 9909/10682 [1:24:36<06:21, 2.03it/s]
|
1451 |
93%|ββββββββββ| 9910/10682 [1:24:36<06:21, 2.02it/s]
|
1452 |
93%|ββββββββββ| 9911/10682 [1:24:37<06:20, 2.03it/s]
|
1453 |
93%|ββββββββββ| 9912/10682 [1:24:37<06:19, 2.03it/s]
|
1454 |
93%|ββββββββββ| 9913/10682 [1:24:38<06:19, 2.03it/s]
|
1455 |
93%|ββββββββββ| 9914/10682 [1:24:38<06:18, 2.03it/s]
|
1456 |
93%|ββββββββββ| 9915/10682 [1:24:39<06:18, 2.03it/s]
|
1457 |
93%|ββββββββββ| 9916/10682 [1:24:39<06:17, 2.03it/s]
|
1458 |
93%|ββββββββββ| 9917/10682 [1:24:40<06:17, 2.03it/s]
|
1459 |
93%|ββββββββββ| 9918/10682 [1:24:40<06:16, 2.03it/s]
|
1460 |
93%|ββββββββββ| 9919/10682 [1:24:41<06:16, 2.02it/s]
|
1461 |
93%|ββββββββββ| 9920/10682 [1:24:41<06:16, 2.03it/s]
|
1462 |
93%|ββββββββββ| 9921/10682 [1:24:42<06:15, 2.03it/s]
|
1463 |
93%|ββββββββββ| 9922/10682 [1:24:42<06:15, 2.03it/s]
|
1464 |
93%|ββββββββββ| 9923/10682 [1:24:43<06:14, 2.02it/s]
|
1465 |
93%|ββββββββββ| 9924/10682 [1:24:43<06:14, 2.02it/s]
|
1466 |
93%|ββββββββββ| 9925/10682 [1:24:44<06:22, 1.98it/s]{'loss': 2.6435, 'grad_norm': 0.2638016641139984, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.0}
|
|
|
1467 |
|
1468 |
93%|ββββββββββ| 9925/10682 [1:24:44<06:22, 1.98it/s]
|
1469 |
93%|ββββββββββ| 9926/10682 [1:25:02<1:14:46, 5.93s/it]
|
1470 |
93%|ββββββββββ| 9927/10682 [1:25:03<54:08, 4.30s/it]
|
1471 |
93%|ββββββββββ| 9928/10682 [1:25:03<39:42, 3.16s/it]
|
1472 |
93%|ββββββββββ| 9929/10682 [1:25:04<29:39, 2.36s/it]
|
1473 |
93%|ββββββββββ| 9930/10682 [1:25:04<22:35, 1.80s/it]
|
1474 |
93%|ββββββββββ| 9931/10682 [1:25:05<17:38, 1.41s/it]
|
1475 |
93%|ββββββββββ| 9932/10682 [1:25:05<14:10, 1.13s/it]
|
1476 |
93%|ββββββββββ| 9933/10682 [1:25:06<11:45, 1.06it/s]
|
1477 |
93%|ββββββββββ| 9934/10682 [1:25:06<10:03, 1.24it/s]
|
1478 |
93%|ββββββββββ| 9935/10682 [1:25:07<08:55, 1.40it/s]
|
1479 |
93%|ββββββββββ| 9936/10682 [1:25:07<08:04, 1.54it/s]
|
1480 |
93%|ββββββββββ| 9937/10682 [1:25:08<07:29, 1.66it/s]
|
1481 |
93%|ββββββββββ| 9938/10682 [1:25:08<07:04, 1.75it/s]
|
1482 |
93%|ββββββββββ| 9939/10682 [1:25:09<06:47, 1.82it/s]
|
1483 |
93%|ββββββββββ| 9940/10682 [1:25:09<06:36, 1.87it/s]
|
1484 |
93%|ββββββββββ| 9941/10682 [1:25:10<06:26, 1.92it/s]
|
1485 |
93%|ββββββββββ| 9942/10682 [1:25:10<06:19, 1.95it/s]
|
1486 |
93%|ββββββββββ| 9943/10682 [1:25:11<06:14, 1.97it/s]
|
1487 |
93%|ββββββββββ| 9944/10682 [1:25:11<06:11, 1.99it/s]
|
1488 |
93%|ββββββββββ| 9945/10682 [1:25:12<06:09, 2.00it/s]
|
1489 |
93%|ββββββββββ| 9946/10682 [1:25:12<06:07, 2.00it/s]
|
1490 |
93%|ββββββββββ| 9947/10682 [1:25:13<06:05, 2.01it/s]
|
1491 |
93%|ββββββββββ| 9948/10682 [1:25:13<06:04, 2.01it/s]
|
1492 |
93%|ββββββββββ| 9949/10682 [1:25:14<06:04, 2.01it/s]
|
1493 |
93%|ββββββββββ| 9950/10682 [1:25:14<06:02, 2.02it/s]{'loss': 2.5985, 'grad_norm': 0.2674480974674225, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.03}
|
|
|
1494 |
|
1495 |
93%|ββββββββββ| 9950/10682 [1:25:14<06:02, 2.02it/s]
|
1496 |
93%|ββββββββββ| 9951/10682 [1:25:15<06:02, 2.02it/s]
|
|
|
555 |
|
556 |
|
557 |
85%|βββββββββ | 9100/10682 [1:17:44<12:59, 2.03it/s]
|
558 |
85%|βββββββββ | 9101/10682 [1:17:45<13:00, 2.02it/s]
|
559 |
85%|βββββββββ | 9102/10682 [1:17:45<12:59, 2.03it/s]
|
560 |
85%|βββββββββ | 9103/10682 [1:17:46<12:58, 2.03it/s]
|
561 |
85%|βββββββββ | 9104/10682 [1:17:46<12:59, 2.03it/s]
|
562 |
85%|βββββββββ | 9105/10682 [1:17:47<12:57, 2.03it/s]
|
563 |
85%|βββββββββ | 9106/10682 [1:17:47<12:57, 2.03it/s]
|
564 |
85%|βββββββββ | 9107/10682 [1:17:48<12:56, 2.03it/s]
|
565 |
85%|βββββββββ | 9108/10682 [1:17:48<12:56, 2.03it/s]
|
566 |
85%|βββββββββ | 9109/10682 [1:17:49<12:55, 2.03it/s]
|
567 |
85%|βββββββββ | 9110/10682 [1:17:49<12:54, 2.03it/s]
|
568 |
85%|βββββββββ | 9111/10682 [1:17:50<12:55, 2.03it/s]
|
569 |
85%|βββββββββ | 9112/10682 [1:17:50<12:53, 2.03it/s]
|
570 |
85%|βββββββββ | 9113/10682 [1:17:51<12:53, 2.03it/s]
|
571 |
85%|βββββββββ | 9114/10682 [1:17:51<12:52, 2.03it/s]
|
572 |
85%|βββββββββ | 9115/10682 [1:17:52<12:51, 2.03it/s]
|
573 |
85%|βββββββββ | 9116/10682 [1:17:52<12:52, 2.03it/s]
|
574 |
85%|βββββββββ | 9117/10682 [1:17:53<12:51, 2.03it/s]
|
575 |
85%|βββββββββ | 9118/10682 [1:17:53<12:51, 2.03it/s]
|
576 |
85%|βββββββββ | 9119/10682 [1:17:54<12:50, 2.03it/s]
|
577 |
85%|βββββββββ | 9120/10682 [1:17:54<12:51, 2.03it/s]
|
578 |
85%|βββββββββ | 9121/10682 [1:17:55<12:50, 2.03it/s]
|
579 |
85%|βββββββββ | 9122/10682 [1:17:55<12:50, 2.02it/s]
|
580 |
85%|βββββββββ | 9123/10682 [1:17:55<12:49, 2.02it/s]
|
581 |
85%|βββββββββ | 9124/10682 [1:17:56<12:49, 2.03it/s]
|
582 |
85%|βββββββββ | 9125/10682 [1:17:56<12:49, 2.02it/s]{'loss': 2.6917, 'grad_norm': 0.26624444127082825, 'learning_rate': 6.334441157888504e-05, 'epoch': 11.95}
|
583 |
|
584 |
|
585 |
85%|βββββββββ | 9125/10682 [1:17:56<12:49, 2.02it/s]
|
586 |
85%|βββββββββ | 9126/10682 [1:17:57<12:49, 2.02it/s]
|
587 |
85%|βββββββββ | 9127/10682 [1:17:57<12:48, 2.02it/s]
|
588 |
85%|βββββββββ | 9128/10682 [1:17:58<12:47, 2.02it/s]
|
589 |
85%|βββββββββ | 9129/10682 [1:17:58<12:46, 2.02it/s]
|
590 |
85%|βββββββββ | 9130/10682 [1:17:59<12:47, 2.02it/s]
|
591 |
85%|βββββββββ | 9131/10682 [1:17:59<12:45, 2.03it/s]
|
592 |
85%|βββββββββ | 9132/10682 [1:18:00<12:45, 2.02it/s]
|
593 |
85%|βββββββββ | 9133/10682 [1:18:00<12:44, 2.03it/s]
|
594 |
86%|βββββββββ | 9134/10682 [1:18:01<12:44, 2.03it/s]
|
595 |
86%|βββββββββ | 9135/10682 [1:18:01<12:43, 2.03it/s]
|
596 |
86%|βββββββββ | 9136/10682 [1:18:02<12:43, 2.03it/s]
|
597 |
86%|βββββββββ | 9137/10682 [1:18:02<12:42, 2.03it/s]
|
598 |
86%|βββββββββ | 9138/10682 [1:18:03<12:41, 2.03it/s]
|
599 |
86%|βββββββββ | 9139/10682 [1:18:03<12:41, 2.03it/s]
|
600 |
86%|βββββββββ | 9140/10682 [1:18:04<12:40, 2.03it/s]
|
601 |
86%|βββββββββ | 9141/10682 [1:18:04<12:39, 2.03it/s]
|
602 |
86%|βββββββββ | 9142/10682 [1:18:05<12:39, 2.03it/s]
|
603 |
86%|βββββββββ | 9143/10682 [1:18:05<12:39, 2.03it/s]
|
604 |
86%|βββββββββ | 9144/10682 [1:18:06<12:39, 2.03it/s]
|
605 |
86%|βββββββββ | 9145/10682 [1:18:06<12:38, 2.03it/s]
|
606 |
86%|βββββββββ | 9146/10682 [1:18:07<12:38, 2.03it/s]
|
607 |
86%|βββββββββ | 9147/10682 [1:18:07<12:36, 2.03it/s]
|
608 |
86%|βββββββββ | 9148/10682 [1:18:08<12:37, 2.03it/s]
|
609 |
86%|βββββββββ | 9149/10682 [1:18:08<12:36, 2.03it/s]
|
610 |
86%|βββββββββ | 9150/10682 [1:18:09<12:35, 2.03it/s]{'loss': 2.6889, 'grad_norm': 0.2705479860305786, 'learning_rate': 6.1368906655978e-05, 'epoch': 11.98}
|
611 |
|
612 |
|
613 |
86%|βββββββββ | 9150/10682 [1:18:09<12:35, 2.03it/s]
|
614 |
86%|βββββββββ | 9151/10682 [1:18:09<12:36, 2.02it/s]
|
615 |
86%|βββββββββ | 9152/10682 [1:18:10<12:35, 2.03it/s]
|
616 |
86%|βββββββββ | 9153/10682 [1:18:10<12:35, 2.02it/s]
|
617 |
86%|βββββββββ | 9154/10682 [1:18:11<12:34, 2.02it/s]
|
618 |
86%|βββββββββ | 9155/10682 [1:18:11<12:34, 2.02it/s]
|
619 |
86%|βββββββββ | 9156/10682 [1:18:12<12:33, 2.03it/s]
|
620 |
86%|βββββββββ | 9157/10682 [1:18:12<12:32, 2.03it/s]
|
621 |
86%|βββββββββ | 9158/10682 [1:18:13<12:32, 2.03it/s]
|
622 |
86%|βββββββββ | 9159/10682 [1:18:13<12:31, 2.03it/s]
|
623 |
86%|βββββββββ | 9160/10682 [1:18:14<12:30, 2.03it/s]
|
624 |
86%|βββββββββ | 9161/10682 [1:18:14<12:29, 2.03it/s]
|
625 |
86%|βββββββοΏ½οΏ½οΏ½β | 9162/10682 [1:18:15<12:22, 2.05it/s]
|
626 |
86%|βββββββββ | 9163/10682 [1:18:28<1:45:57, 4.19s/it]
|
627 |
86%|βββββββββ | 9164/10682 [1:18:28<1:17:55, 3.08s/it]
|
628 |
86%|βββββββββ | 9165/10682 [1:18:29<58:14, 2.30s/it]
|
629 |
86%|βββββββββ | 9166/10682 [1:18:29<44:33, 1.76s/it]
|
630 |
86%|βββββββββ | 9167/10682 [1:18:30<34:54, 1.38s/it]
|
631 |
86%|βββββββββ | 9168/10682 [1:18:30<28:08, 1.12s/it]
|
632 |
86%|βββββββββ | 9169/10682 [1:18:31<23:24, 1.08it/s]
|
633 |
86%|βββββββββ | 9170/10682 [1:18:31<20:06, 1.25it/s]
|
634 |
86%|βββββββββ | 9171/10682 [1:18:31<17:47, 1.42it/s]
|
635 |
86%|βββββββββ | 9172/10682 [1:18:32<16:15, 1.55it/s]
|
636 |
86%|βββββββββ | 9173/10682 [1:18:32<15:06, 1.66it/s]
|
637 |
86%|βββββββββ | 9174/10682 [1:18:33<14:17, 1.76it/s]
|
638 |
86%|βββββββββ | 9175/10682 [1:18:33<13:44, 1.83it/s]{'loss': 2.6468, 'grad_norm': 0.26857879757881165, 'learning_rate': 5.94226809108499e-05, 'epoch': 12.02}
|
|
|
639 |
|
640 |
86%|βββββββββ | 9175/10682 [1:18:33<13:44, 1.83it/s]
|
641 |
86%|βββββββββ | 9176/10682 [1:18:34<13:19, 1.88it/s]
|
642 |
86%|βββββββββ | 9177/10682 [1:18:34<13:02, 1.92it/s]
|
643 |
86%|βββββββββ | 9178/10682 [1:18:35<12:49, 1.96it/s]
|
644 |
86%|βββββββββ | 9179/10682 [1:18:35<12:40, 1.98it/s]
|
645 |
86%|βββββββββ | 9180/10682 [1:18:36<12:33, 1.99it/s]
|
646 |
86%|βββββββββ | 9181/10682 [1:18:36<12:28, 2.00it/s]
|
647 |
86%|βββββββββ | 9182/10682 [1:18:37<12:27, 2.01it/s]
|
648 |
86%|βββββββββ | 9183/10682 [1:18:37<12:24, 2.01it/s]
|
649 |
86%|βββββββββ | 9184/10682 [1:18:38<12:23, 2.01it/s]
|
650 |
86%|βββββββββ | 9185/10682 [1:18:38<12:21, 2.02it/s]
|
651 |
86%|βββββββββ | 9186/10682 [1:18:39<12:21, 2.02it/s]
|
652 |
86%|βββββββββ | 9187/10682 [1:18:39<12:19, 2.02it/s]
|
653 |
86%|βββββββββ | 9188/10682 [1:18:40<12:18, 2.02it/s]
|
654 |
86%|βββββββββ | 9189/10682 [1:18:40<12:18, 2.02it/s]
|
655 |
86%|βββββββββ | 9190/10682 [1:18:41<12:19, 2.02it/s]
|
656 |
+
|
657 |
|
658 |
86%|βββββββββ | 9175/10682 [1:18:33<13:44, 1.83it/s]
|
659 |
86%|βββββββββ | 9176/10682 [1:18:34<13:19, 1.88it/s]
|
660 |
86%|βββββββββ | 9177/10682 [1:18:34<13:02, 1.92it/s]
|
661 |
86%|βββββββββ | 9178/10682 [1:18:35<12:49, 1.96it/s]
|
662 |
86%|βββββββββ | 9179/10682 [1:18:35<12:40, 1.98it/s]
|
663 |
86%|βββββββββ | 9180/10682 [1:18:36<12:33, 1.99it/s]
|
664 |
86%|βββββββββ | 9181/10682 [1:18:36<12:28, 2.00it/s]
|
665 |
86%|βββββββββ | 9182/10682 [1:18:37<12:27, 2.01it/s]
|
666 |
86%|βββββββββ | 9183/10682 [1:18:37<12:24, 2.01it/s]
|
667 |
86%|βββββββββ | 9184/10682 [1:18:38<12:23, 2.01it/s]
|
668 |
86%|βββββββββ | 9185/10682 [1:18:38<12:21, 2.02it/s]
|
669 |
86%|βββββββββ | 9186/10682 [1:18:39<12:21, 2.02it/s]
|
670 |
86%|βββββββββ | 9187/10682 [1:18:39<12:19, 2.02it/s]
|
671 |
86%|βββββββββ | 9188/10682 [1:18:40<12:18, 2.02it/s]
|
672 |
86%|βββββββββ | 9189/10682 [1:18:40<12:18, 2.02it/s]
|
673 |
86%|βββββββββ | 9190/10682 [1:18:41<12:19, 2.02it/s]
|
674 |
86%|βββββββββ | 9191/10682 [1:18:41<12:20, 2.01it/s]
|
675 |
86%|βββββββββ | 9192/10682 [1:18:42<12:17, 2.02it/s]
|
676 |
86%|βββββββββ | 9193/10682 [1:18:42<12:15, 2.02it/s]
|
677 |
86%|βββββββββ | 9194/10682 [1:18:43<12:14, 2.03it/s]
|
678 |
86%|βββββββββ | 9195/10682 [1:18:43<12:12, 2.03it/s]
|
679 |
86%|βββββββββ | 9196/10682 [1:18:44<12:12, 2.03it/s]
|
680 |
86%|βββββββββ | 9197/10682 [1:18:44<12:11, 2.03it/s]
|
681 |
86%|βββββββββ | 9198/10682 [1:18:45<12:10, 2.03it/s]
|
682 |
86%|βββββββββ | 9199/10682 [1:18:45<12:10, 2.03it/s]
|
683 |
86%|βββββββββ | 9200/10682 [1:18:46<12:09, 2.03it/s]{'loss': 2.6277, 'grad_norm': 0.2667827308177948, 'learning_rate': 5.7505864256519716e-05, 'epoch': 12.05}
|
684 |
+
|
685 |
|
686 |
86%|βββββββββ | 9200/10682 [1:18:46<12:09, 2.03it/s]
|
687 |
86%|βββββββββ | 9201/10682 [1:18:46<12:09, 2.03it/s]
|
688 |
86%|βββββββββ | 9202/10682 [1:18:47<12:09, 2.03it/s]
|
689 |
86%|βββββββββ | 9203/10682 [1:18:47<12:08, 2.03it/s]
|
690 |
86%|βββββββββ | 9204/10682 [1:18:48<12:09, 2.03it/s]
|
691 |
86%|βββββββββ | 9205/10682 [1:18:48<12:07, 2.03it/s]
|
692 |
86%|βββββββββ | 9206/10682 [1:18:49<12:05, 2.03it/s]
|
693 |
86%|βββββββββ | 9207/10682 [1:18:49<12:05, 2.03it/s]
|
694 |
86%|βββββββββ | 9208/10682 [1:18:50<12:05, 2.03it/s]
|
695 |
86%|βββββββββ | 9209/10682 [1:18:50<12:04, 2.03it/s]
|
696 |
86%|βββββββββ | 9210/10682 [1:18:51<12:04, 2.03it/s]
|
697 |
86%|βββββββββ | 9211/10682 [1:18:51<12:03, 2.03it/s]
|
698 |
86%|βββββββββ | 9212/10682 [1:18:52<12:03, 2.03it/s]
|
699 |
86%|βββββββββ | 9213/10682 [1:18:52<12:03, 2.03it/s]
|
700 |
86%|βββββββββ | 9214/10682 [1:18:53<12:02, 2.03it/s]
|
701 |
86%|βββββββββ | 9215/10682 [1:18:53<12:02, 2.03it/s]
|
702 |
86%|βββββββββ | 9216/10682 [1:18:54<12:01, 2.03it/s]
|
703 |
86%|βββββββββ | 9217/10682 [1:18:54<12:01, 2.03it/s]
|
704 |
86%|βββββββββ | 9218/10682 [1:18:55<12:00, 2.03it/s]
|
705 |
86%|βββββββββ | 9219/10682 [1:18:55<12:00, 2.03it/s]
|
706 |
86%|βββββββββ | 9220/10682 [1:18:56<12:00, 2.03it/s]
|
707 |
86%|βββββββββ | 9221/10682 [1:18:56<12:00, 2.03it/s]
|
708 |
86%|βββββββββ | 9222/10682 [1:18:57<11:59, 2.03it/s]
|
709 |
86%|βββββββββ | 9223/10682 [1:18:57<11:58, 2.03it/s]
|
710 |
86%|βββββββββ | 9224/10682 [1:18:58<11:58, 2.03it/s]
|
711 |
86%|βββββββββ | 9225/10682 [1:18:58<11:58, 2.03it/s]{'loss': 2.6087, 'grad_norm': 0.26708680391311646, 'learning_rate': 5.561858464291258e-05, 'epoch': 12.08}
|
712 |
|
713 |
+
|
714 |
86%|βββββββββ | 9225/10682 [1:18:58<11:58, 2.03it/s]
|
715 |
86%|βββββββββ | 9226/10682 [1:18:59<11:58, 2.03it/s]
|
716 |
86%|βββββββββ | 9227/10682 [1:18:59<11:58, 2.02it/s]
|
717 |
86%|βββββββββ | 9228/10682 [1:19:00<11:57, 2.03it/s]
|
718 |
86%|βββββββββ | 9229/10682 [1:19:00<11:56, 2.03it/s]
|
719 |
86%|βββββββββ | 9230/10682 [1:19:01<11:55, 2.03it/s]
|
720 |
86%|βββββββββ | 9231/10682 [1:19:01<11:54, 2.03it/s]
|
721 |
86%|βββββββββ | 9232/10682 [1:19:02<11:54, 2.03it/s]
|
722 |
86%|βββββββββ | 9233/10682 [1:19:02<11:54, 2.03it/s]
|
723 |
86%|βββββββββ | 9234/10682 [1:19:03<11:53, 2.03it/s]
|
724 |
86%|βββββββββ | 9235/10682 [1:19:03<11:53, 2.03it/s]
|
725 |
86%|βββββββββ | 9236/10682 [1:19:04<11:52, 2.03it/s]
|
726 |
86%|βββββββββ | 9237/10682 [1:19:04<11:52, 2.03it/s]
|
727 |
86%|βββββββββ | 9238/10682 [1:19:05<11:51, 2.03it/s]
|
728 |
86%|βββββββββ | 9239/10682 [1:19:05<11:50, 2.03it/s]
|
729 |
87%|βββββββββ | 9240/10682 [1:19:06<11:50, 2.03it/s]
|
730 |
87%|βββββββββ | 9241/10682 [1:19:06<11:49, 2.03it/s]
|
731 |
87%|βββββββββ | 9242/10682 [1:19:07<11:49, 2.03it/s]
|
732 |
87%|βββββββββ | 9243/10682 [1:19:07<11:48, 2.03it/s]
|
733 |
87%|βββββββββ | 9244/10682 [1:19:07<11:47, 2.03it/s]
|
734 |
87%|βββββββββ | 9245/10682 [1:19:08<11:47, 2.03it/s]
|
735 |
87%|βββββββββ | 9246/10682 [1:19:08<11:47, 2.03it/s]
|
736 |
87%|βββββββββ | 9247/10682 [1:19:09<11:45, 2.03it/s]
|
737 |
87%|βββββββββ | 9248/10682 [1:19:09<11:45, 2.03it/s]
|
738 |
87%|βββββββββ | 9249/10682 [1:19:10<11:45, 2.03it/s]
|
739 |
87%|βββββββββ | 9250/10682 [1:19:10<11:44, 2.03it/s]{'loss': 2.6341, 'grad_norm': 0.2648920714855194, 'learning_rate': 5.3760968048319145e-05, 'epoch': 12.12}
|
740 |
+
|
741 |
|
742 |
87%|βββββββββ | 9250/10682 [1:19:10<11:44, 2.03it/s]
|
743 |
87%|βββββββββ | 9251/10682 [1:19:11<11:45, 2.03it/s]
|
744 |
87%|βββββββββ | 9252/10682 [1:19:11<11:44, 2.03it/s]
|
745 |
87%|βββββββββ | 9253/10682 [1:19:12<11:44, 2.03it/s]
|
746 |
87%|βββββββββ | 9254/10682 [1:19:12<11:43, 2.03it/s]
|
747 |
87%|βββββββββ | 9255/10682 [1:19:13<11:43, 2.03it/s]
|
748 |
87%|βββββββββ | 9256/10682 [1:19:13<11:43, 2.03it/s]
|
749 |
87%|βββββββββ | 9257/10682 [1:19:14<11:42, 2.03it/s]
|
750 |
87%|βββββββββ | 9258/10682 [1:19:14<11:41, 2.03it/s]
|
751 |
87%|βββββββββ | 9259/10682 [1:19:15<11:41, 2.03it/s]
|
752 |
87%|βββββββββ | 9260/10682 [1:19:15<11:40, 2.03it/s]
|
753 |
87%|βββββββββ | 9261/10682 [1:19:16<11:40, 2.03it/s]
|
754 |
87%|βββββββββ | 9262/10682 [1:19:16<11:39, 2.03it/s]
|
755 |
87%|βββββββββ | 9263/10682 [1:19:17<11:39, 2.03it/s]
|
756 |
87%|βββββββββ | 9264/10682 [1:19:17<11:39, 2.03it/s]
|
757 |
87%|βββββββββ | 9265/10682 [1:19:18<11:37, 2.03it/s]
|
758 |
87%|βββββββββ | 9266/10682 [1:19:18<11:37, 2.03it/s]
|
759 |
87%|βββββββββ | 9267/10682 [1:19:19<11:36, 2.03it/s]
|
760 |
87%|βββββββββ | 9268/10682 [1:19:19<11:36, 2.03it/s]
|
761 |
87%|βββββββββ | 9269/10682 [1:19:20<11:36, 2.03it/s]
|
762 |
87%|βββββββββ | 9270/10682 [1:19:20<11:35, 2.03it/s]
|
763 |
87%|βββββββββ | 9271/10682 [1:19:21<11:35, 2.03it/s]
|
764 |
87%|ββββββββοΏ½οΏ½οΏ½ | 9272/10682 [1:19:21<11:34, 2.03it/s]
|
765 |
87%|βββββββββ | 9273/10682 [1:19:22<11:34, 2.03it/s]
|
766 |
87%|βββββββββ | 9274/10682 [1:19:22<11:33, 2.03it/s]
|
767 |
87%|βββββββββ | 9275/10682 [1:19:23<11:33, 2.03it/s]{'loss': 2.6443, 'grad_norm': 0.268455445766449, 'learning_rate': 5.193313847098613e-05, 'epoch': 12.15}
|
768 |
+
|
769 |
|
770 |
87%|βββββββββ | 9275/10682 [1:19:23<11:33, 2.03it/s]
|
771 |
87%|βββββββββ | 9276/10682 [1:19:23<11:33, 2.03it/s]
|
772 |
87%|βββββββββ | 9277/10682 [1:19:24<11:32, 2.03it/s]
|
773 |
87%|βββββββββ | 9278/10682 [1:19:24<11:31, 2.03it/s]
|
774 |
87%|βββββββββ | 9279/10682 [1:19:25<11:31, 2.03it/s]
|
775 |
87%|βββββββββ | 9280/10682 [1:19:25<11:30, 2.03it/s]
|
776 |
87%|βββββββββ | 9281/10682 [1:19:26<11:30, 2.03it/s]
|
777 |
87%|βββββββββ | 9282/10682 [1:19:26<11:30, 2.03it/s]
|
778 |
87%|βββββββββ | 9283/10682 [1:19:27<11:28, 2.03it/s]
|
779 |
87%|βββββββββ | 9284/10682 [1:19:27<11:28, 2.03it/s]
|
780 |
87%|βββββββββ | 9285/10682 [1:19:28<11:28, 2.03it/s]
|
781 |
87%|βββββββββ | 9286/10682 [1:19:28<11:27, 2.03it/s]
|
782 |
87%|βββββββββ | 9287/10682 [1:19:29<11:27, 2.03it/s]
|
783 |
87%|βββββββββ | 9288/10682 [1:19:29<11:27, 2.03it/s]
|
784 |
87%|βββββββββ | 9289/10682 [1:19:30<11:26, 2.03it/s]
|
785 |
87%|βββββββββ | 9290/10682 [1:19:30<11:26, 2.03it/s]
|
786 |
87%|βββββββββ | 9291/10682 [1:19:31<11:25, 2.03it/s]
|
787 |
87%|βββββββββ | 9292/10682 [1:19:31<11:25, 2.03it/s]
|
788 |
87%|βββββββββ | 9293/10682 [1:19:32<11:25, 2.03it/s]
|
789 |
87%|βββββββββ | 9294/10682 [1:19:32<11:24, 2.03it/s]
|
790 |
87%|βββββββββ | 9295/10682 [1:19:33<11:24, 2.03it/s]
|
791 |
87%|βββββββββ | 9296/10682 [1:19:33<11:24, 2.03it/s]
|
792 |
87%|βββββββββ | 9297/10682 [1:19:34<11:23, 2.03it/s]
|
793 |
87%|βββββββββ | 9298/10682 [1:19:34<11:23, 2.03it/s]
|
794 |
87%|βββββββββ | 9299/10682 [1:19:35<11:21, 2.03it/s]
|
795 |
87%|βββββββββ | 9300/10682 [1:19:35<11:21, 2.03it/s]{'loss': 2.6238, 'grad_norm': 0.2692926526069641, 'learning_rate': 5.0135217920839137e-05, 'epoch': 12.18}
|
796 |
+
|
797 |
|
798 |
87%|βββββββββ | 9300/10682 [1:19:35<11:21, 2.03it/s]
|
799 |
87%|βββββββββ | 9301/10682 [1:19:36<11:21, 2.03it/s]
|
800 |
87%|βββββββββ | 9302/10682 [1:19:36<11:21, 2.03it/s]
|
801 |
87%|βββββββββ | 9303/10682 [1:19:37<11:20, 2.03it/s]
|
802 |
87%|βββββββββ | 9304/10682 [1:19:37<11:19, 2.03it/s]
|
803 |
87%|βββββββββ | 9305/10682 [1:19:38<11:19, 2.03it/s]
|
804 |
87%|βββββββββ | 9306/10682 [1:19:38<11:18, 2.03it/s]
|
805 |
87%|βββββββββ | 9307/10682 [1:19:39<11:18, 2.03it/s]
|
806 |
87%|βββββββββ | 9308/10682 [1:19:39<11:17, 2.03it/s]
|
807 |
87%|βββββββββ | 9309/10682 [1:19:40<11:17, 2.03it/s]
|
808 |
87%|βββββββββ | 9310/10682 [1:19:40<11:16, 2.03it/s]
|
809 |
87%|βββββββββ | 9311/10682 [1:19:41<11:15, 2.03it/s]
|
810 |
87%|βββββββββ | 9312/10682 [1:19:41<11:15, 2.03it/s]
|
811 |
87%|βββββββββ | 9313/10682 [1:19:42<11:14, 2.03it/s]
|
812 |
87%|βββββββββ | 9314/10682 [1:19:42<11:14, 2.03it/s]
|
813 |
87%|βββββββββ | 9315/10682 [1:19:42<11:13, 2.03it/s]
|
814 |
87%|βββββββββ | 9316/10682 [1:19:43<11:14, 2.03it/s]
|
815 |
87%|βββββββββ | 9317/10682 [1:19:43<11:13, 2.03it/s]
|
816 |
87%|βββββββββ | 9318/10682 [1:19:44<11:12, 2.03it/s]
|
817 |
87%|βββββββββ | 9319/10682 [1:19:44<11:12, 2.03it/s]
|
818 |
87%|βββββββββ | 9320/10682 [1:19:45<11:11, 2.03it/s]
|
819 |
87%|βββββββββ | 9321/10682 [1:19:45<11:11, 2.03it/s]
|
820 |
87%|βββββββββ | 9322/10682 [1:19:46<11:10, 2.03it/s]
|
821 |
87%|βββββββββ | 9323/10682 [1:19:46<11:09, 2.03it/s]
|
822 |
87%|βββββββββ | 9324/10682 [1:19:47<11:09, 2.03it/s]
|
823 |
87%|βββββββββ | 9325/10682 [1:19:47<11:08, 2.03it/s]{'loss': 2.6351, 'grad_norm': 0.2662162184715271, 'learning_rate': 4.836732641133895e-05, 'epoch': 12.21}
|
824 |
+
|
825 |
|
826 |
87%|βββββββββ | 9325/10682 [1:19:47<11:08, 2.03it/s]
|
827 |
87%|βββββββββ | 9326/10682 [1:19:48<11:09, 2.03it/s]
|
828 |
87%|βββββββββ | 9327/10682 [1:19:48<11:08, 2.03it/s]
|
829 |
87%|βββββββββ | 9328/10682 [1:19:49<11:08, 2.02it/s]
|
830 |
87%|βββββββββ | 9329/10682 [1:19:49<11:07, 2.03it/s]
|
831 |
87%|βββββββββ | 9330/10682 [1:19:50<11:06, 2.03it/s]
|
832 |
87%|βββββββββ | 9331/10682 [1:19:50<11:06, 2.03it/s]
|
833 |
87%|βββββββββ | 9332/10682 [1:19:51<11:05, 2.03it/s]
|
834 |
87%|βββββββββ | 9333/10682 [1:19:51<11:05, 2.03it/s]
|
835 |
87%|βββββββββ | 9334/10682 [1:19:52<11:03, 2.03it/s]
|
836 |
87%|βββββββββ | 9335/10682 [1:19:52<11:04, 2.03it/s]
|
837 |
87%|βββββββββ | 9336/10682 [1:19:53<11:03, 2.03it/s]
|
838 |
87%|βββββββββ | 9337/10682 [1:19:53<11:03, 2.03it/s]
|
839 |
87%|βββββββββ | 9338/10682 [1:19:54<11:03, 2.03it/s]
|
840 |
87%|βββββββββ | 9339/10682 [1:19:54<11:02, 2.03it/s]
|
841 |
87%|βββββββββ | 9340/10682 [1:19:55<11:02, 2.03it/s]
|
842 |
87%|βββββββββ | 9341/10682 [1:19:55<11:02, 2.03it/s]
|
843 |
87%|βββββββββ | 9342/10682 [1:19:56<11:02, 2.02it/s]
|
844 |
87%|βββββββββ | 9343/10682 [1:19:56<11:00, 2.03it/s]
|
845 |
87%|βββββββββ | 9344/10682 [1:19:57<11:00, 2.03it/s]
|
846 |
87%|βββββββββ | 9345/10682 [1:19:57<10:59, 2.03it/s]
|
847 |
87%|βββββββββ | 9346/10682 [1:19:58<10:59, 2.03it/s]
|
848 |
88%|βββββββββ | 9347/10682 [1:19:58<10:58, 2.03it/s]
|
849 |
88%|βββββββββ | 9348/10682 [1:19:59<10:58, 2.03it/s]
|
850 |
88%|βββββββββ | 9349/10682 [1:19:59<10:57, 2.03it/s]
|
851 |
88%|βββββββββ | 9350/10682 [1:20:00<10:56, 2.03it/s]
|
852 |
|
853 |
+
|
854 |
88%|βββββββββ | 9350/10682 [1:20:00<10:56, 2.03it/s]
|
855 |
88%|βββββββββ | 9351/10682 [1:20:00<10:57, 2.02it/s]
|
856 |
88%|βββββββββ | 9352/10682 [1:20:01<10:56, 2.03it/s]
|
857 |
88%|βββββββββ | 9353/10682 [1:20:01<10:55, 2.03it/s]
|
858 |
88%|βββββββββ | 9354/10682 [1:20:02<10:54, 2.03it/s]
|
859 |
88%|βββββββββ | 9355/10682 [1:20:02<10:54, 2.03it/s]
|
860 |
88%|βββββββββ | 9356/10682 [1:20:03<10:54, 2.03it/s]
|
861 |
88%|βββββββββ | 9357/10682 [1:20:03<10:53, 2.03it/s]
|
862 |
88%|βββββββββ | 9358/10682 [1:20:04<10:53, 2.03it/s]
|
863 |
88%|βββββββββ | 9359/10682 [1:20:04<10:52, 2.03it/s]
|
864 |
88%|βββββββββ | 9360/10682 [1:20:05<10:52, 2.03it/s]
|
865 |
88%|βββββββββ | 9361/10682 [1:20:05<10:51, 2.03it/s]
|
866 |
88%|βββββββββ | 9362/10682 [1:20:06<10:51, 2.03it/s]
|
867 |
88%|βββββββββ | 9363/10682 [1:20:06<10:50, 2.03it/s]
|
868 |
88%|βββββββββ | 9364/10682 [1:20:07<10:50, 2.03it/s]
|
869 |
88%|βββββββββ | 9365/10682 [1:20:07<10:50, 2.03it/s]
|
870 |
88%|βββββββββ | 9366/10682 [1:20:08<10:49, 2.03it/s]
|
871 |
88%|βββββββββ | 9367/10682 [1:20:08<10:49, 2.02it/s]
|
872 |
88%|βββββββββ | 9368/10682 [1:20:09<10:49, 2.02it/s]
|
873 |
88%|βββββββββ | 9369/10682 [1:20:09<10:48, 2.02it/s]
|
874 |
88%|βββββββββ | 9370/10682 [1:20:10<10:48, 2.02it/s]
|
875 |
88%|βββββββββ | 9371/10682 [1:20:10<10:46, 2.03it/s]
|
876 |
88%|βββββββββ | 9372/10682 [1:20:11<10:46, 2.03it/s]
|
877 |
88%|βββββββββ | 9373/10682 [1:20:11<10:45, 2.03it/s]
|
878 |
88%|βββββββββ | 9374/10682 [1:20:12<10:45, 2.03it/s]
|
879 |
88%|βββββββββ | 9375/10682 [1:20:12<10:44, 2.03it/s]{'loss': 2.6218, 'grad_norm': 0.26617616415023804, 'learning_rate': 4.492210053786228e-05, 'epoch': 12.28}
|
880 |
+
|
881 |
|
882 |
88%|βββββββββ | 9375/10682 [1:20:12<10:44, 2.03it/s]
|
883 |
88%|βββββββββ | 9376/10682 [1:20:13<10:44, 2.03it/s]
|
884 |
88%|βββββββββ | 9377/10682 [1:20:13<10:44, 2.03it/s]
|
885 |
88%|βββββββββ | 9378/10682 [1:20:14<10:43, 2.03it/s]
|
886 |
88%|βββββββββ | 9379/10682 [1:20:14<10:42, 2.03it/s]
|
887 |
88%|βββββββββ | 9380/10682 [1:20:15<10:42, 2.03it/s]
|
888 |
88%|βββββββββ | 9381/10682 [1:20:15<10:42, 2.03it/s]
|
889 |
88%|βββββββββ | 9382/10682 [1:20:16<10:41, 2.03it/s]
|
890 |
88%|βββββββββ | 9383/10682 [1:20:16<10:40, 2.03it/s]
|
891 |
88%|βββββββββ | 9384/10682 [1:20:17<10:40, 2.03it/s]
|
892 |
88%|βββββββββ | 9385/10682 [1:20:17<10:39, 2.03it/s]
|
893 |
88%|βββββββββ | 9386/10682 [1:20:18<10:39, 2.03it/s]
|
894 |
88%|βββββββββ | 9387/10682 [1:20:18<10:38, 2.03it/s]
|
895 |
88%|βββββββββ | 9388/10682 [1:20:19<10:38, 2.03it/s]
|
896 |
88%|βββββββββ | 9389/10682 [1:20:19<10:37, 2.03it/s]
|
897 |
88%|βββββββββ | 9390/10682 [1:20:19<10:37, 2.03it/s]
|
898 |
88%|βββββββββ | 9391/10682 [1:20:20<10:36, 2.03it/s]
|
899 |
88%|βββββββββ | 9392/10682 [1:20:20<10:36, 2.03it/s]
|
900 |
88%|βββββββββ | 9393/10682 [1:20:21<10:36, 2.03it/s]
|
901 |
88%|βββββββββ | 9394/10682 [1:20:21<10:36, 2.02it/s]
|
902 |
88%|βββββββββ | 9395/10682 [1:20:22<10:35, 2.02it/s]
|
903 |
88%|βββββββββ | 9396/10682 [1:20:22<10:35, 2.02it/s]
|
904 |
88%|βββββββββ | 9397/10682 [1:20:23<10:34, 2.03it/s]
|
905 |
88%|βββββββββ | 9398/10682 [1:20:23<10:33, 2.03it/s]
|
906 |
88%|βββββββββ | 9399/10682 [1:20:24<10:32, 2.03it/s]
|
907 |
88%|βββββββββ | 9400/10682 [1:20:24<10:32, 2.03it/s]{'loss': 2.6316, 'grad_norm': 0.2690180540084839, 'learning_rate': 4.3244996147050855e-05, 'epoch': 12.31}
|
908 |
|
909 |
+
|
910 |
88%|βββββββββ | 9400/10682 [1:20:24<10:32, 2.03it/s]
|
911 |
88%|βββββββββ | 9401/10682 [1:20:25<10:32, 2.02it/s]
|
912 |
88%|βββββββββ | 9402/10682 [1:20:25<10:31, 2.03it/s]
|
913 |
88%|βββββββββ | 9403/10682 [1:20:26<10:31, 2.03it/s]
|
914 |
88%|βββββββββ | 9404/10682 [1:20:26<10:30, 2.03it/s]
|
915 |
88%|βββββββββ | 9405/10682 [1:20:27<10:29, 2.03it/s]
|
916 |
88%|βββββββββ | 9406/10682 [1:20:27<10:29, 2.03it/s]
|
917 |
88%|βββββββββ | 9407/10682 [1:20:28<10:28, 2.03it/s]
|
918 |
88%|βββββββββ | 9408/10682 [1:20:28<10:28, 2.03it/s]
|
919 |
88%|βββββββββ | 9409/10682 [1:20:29<10:27, 2.03it/s]
|
920 |
88%|βββββββββ | 9410/10682 [1:20:29<10:27, 2.03it/s]
|
921 |
88%|βββββββββ | 9411/10682 [1:20:30<10:27, 2.03it/s]
|
922 |
88%|βββββββββ | 9412/10682 [1:20:30<10:26, 2.03it/s]
|
923 |
88%|βββββββββ | 9413/10682 [1:20:31<10:25, 2.03it/s]
|
924 |
88%|βββββββββ | 9414/10682 [1:20:31<10:25, 2.03it/s]
|
925 |
88%|βββββββββ | 9415/10682 [1:20:32<10:24, 2.03it/s]
|
926 |
88%|βββββββββ | 9416/10682 [1:20:32<10:23, 2.03it/s]
|
927 |
88%|βββββββββ | 9417/10682 [1:20:33<10:23, 2.03it/s]
|
928 |
88%|βββββββββ | 9418/10682 [1:20:33<10:22, 2.03it/s]
|
929 |
88%|βββββββββ | 9419/10682 [1:20:34<10:22, 2.03it/s]
|
930 |
88%|βββββββββ | 9420/10682 [1:20:34<10:22, 2.03it/s]
|
931 |
88%|βββββββββ | 9421/10682 [1:20:35<10:21, 2.03it/s]
|
932 |
88%|βββββββββ | 9422/10682 [1:20:35<10:20, 2.03it/s]
|
933 |
88%|βββββββββ | 9423/10682 [1:20:36<10:20, 2.03it/s]
|
934 |
88%|βββββββββ | 9424/10682 [1:20:36<10:19, 2.03it/s]
|
935 |
88%|βββββββββ | 9425/10682 [1:20:37<10:18, 2.03it/s]{'loss': 2.629, 'grad_norm': 0.26739317178726196, 'learning_rate': 4.1598380727865315e-05, 'epoch': 12.34}
|
936 |
+
|
937 |
|
938 |
88%|βββββββββ | 9425/10682 [1:20:37<10:18, 2.03it/s]
|
939 |
88%|βββββββββ | 9426/10682 [1:20:37<10:19, 2.03it/s]
|
940 |
88%|βββββββββ | 9427/10682 [1:20:38<10:18, 2.03it/s]
|
941 |
88%|βββββββββ | 9428/10682 [1:20:38<10:18, 2.03it/s]
|
942 |
88%|βββββββββ | 9429/10682 [1:20:39<10:17, 2.03it/s]
|
943 |
88%|βββββββββ | 9430/10682 [1:20:39<10:16, 2.03it/s]
|
944 |
88%|βββββββββ | 9431/10682 [1:20:40<10:15, 2.03it/s]
|
945 |
88%|βββββββββ | 9432/10682 [1:20:40<10:15, 2.03it/s]
|
946 |
88%|βββββββββ | 9433/10682 [1:20:41<10:14, 2.03it/s]
|
947 |
88%|βββββββββ | 9434/10682 [1:20:41<10:14, 2.03it/s]
|
948 |
88%|βββββββββ | 9435/10682 [1:20:42<10:14, 2.03it/s]
|
949 |
88%|βββββββββ | 9436/10682 [1:20:42<10:13, 2.03it/s]
|
950 |
88%|βββββββββ | 9437/10682 [1:20:43<10:12, 2.03it/s]
|
951 |
88%|βββββββββ | 9438/10682 [1:20:43<10:12, 2.03it/s]
|
952 |
88%|βββββββββ | 9439/10682 [1:20:44<10:12, 2.03it/s]
|
953 |
88%|βββββββββ | 9440/10682 [1:20:44<10:12, 2.03it/s]
|
954 |
88%|βββββββββ | 9441/10682 [1:20:45<10:11, 2.03it/s]
|
955 |
88%|βββββββββ | 9442/10682 [1:20:45<10:10, 2.03it/s]
|
956 |
88%|βββββββββ | 9443/10682 [1:20:46<10:09, 2.03it/s]
|
957 |
88%|βββββββββ | 9444/10682 [1:20:46<10:09, 2.03it/s]
|
958 |
88%|βββββββββ | 9445/10682 [1:20:47<10:08, 2.03it/s]
|
959 |
88%|βββββββββ | 9446/10682 [1:20:47<10:08, 2.03it/s]
|
960 |
88%|βββββββββ | 9447/10682 [1:20:48<10:08, 2.03it/s]
|
961 |
88%|βββββββββ | 9448/10682 [1:20:48<10:07, 2.03it/s]
|
962 |
88%|βββββββββ | 9449/10682 [1:20:49<10:07, 2.03it/s]
|
963 |
88%|βββββββββ | 9450/10682 [1:20:49<10:06, 2.03it/s]
|
964 |
|
965 |
+
|
966 |
88%|βββββββββ | 9450/10682 [1:20:49<10:06, 2.03it/s]
|
967 |
88%|βββββββββ | 9451/10682 [1:20:50<10:07, 2.03it/s]
|
968 |
88%|βββββββββ | 9452/10682 [1:20:50<10:06, 2.03it/s]
|
969 |
88%|βββββββββ | 9453/10682 [1:20:51<10:05, 2.03it/s]
|
970 |
89%|βββββββββ | 9454/10682 [1:20:51<10:05, 2.03it/s]
|
971 |
89%|βββββββββ | 9455/10682 [1:20:52<10:04, 2.03it/s]
|
972 |
89%|βββββββββ | 9456/10682 [1:20:52<10:04, 2.03it/s]
|
973 |
89%|βββββββββ | 9457/10682 [1:20:53<10:03, 2.03it/s]
|
974 |
89%|βββββββββ | 9458/10682 [1:20:53<10:03, 2.03it/s]
|
975 |
89%|βββββββββ | 9459/10682 [1:20:53<10:02, 2.03it/s]
|
976 |
89%|βββββββββ | 9460/10682 [1:20:54<10:02, 2.03it/s]
|
977 |
89%|βββββββββ | 9461/10682 [1:20:54<10:01, 2.03it/s]
|
978 |
89%|βββββββββ | 9462/10682 [1:20:55<10:01, 2.03it/s]
|
979 |
89%|βββββββββ | 9463/10682 [1:20:55<10:00, 2.03it/s]
|
980 |
89%|βββββββββ | 9464/10682 [1:20:56<09:59, 2.03it/s]
|
981 |
89%|βββββββββ | 9465/10682 [1:20:56<09:59, 2.03it/s]
|
982 |
89%|βββββββββ | 9466/10682 [1:20:57<09:59, 2.03it/s]
|
983 |
89%|βββββββββ | 9467/10682 [1:20:57<09:58, 2.03it/s]
|
984 |
89%|βββββββββ | 9468/10682 [1:20:58<09:58, 2.03it/s]
|
985 |
89%|βββββββββ | 9469/10682 [1:20:58<09:57, 2.03it/s]
|
986 |
89%|βββββββββ | 9470/10682 [1:20:59<09:57, 2.03it/s]
|
987 |
89%|βββββββββ | 9471/10682 [1:20:59<09:56, 2.03it/s]
|
988 |
89%|βββββββββ | 9472/10682 [1:21:00<09:55, 2.03it/s]
|
989 |
89%|βββββββββ | 9473/10682 [1:21:00<09:55, 2.03it/s]
|
990 |
89%|βββββββββ | 9474/10682 [1:21:01<09:54, 2.03it/s]
|
991 |
89%|βββββββββ | 9475/10682 [1:21:01<09:54, 2.03it/s]{'loss': 2.6416, 'grad_norm': 0.26726436614990234, 'learning_rate': 3.839705441646779e-05, 'epoch': 12.41}
|
992 |
+
|
993 |
|
994 |
89%|βββββββββ | 9475/10682 [1:21:01<09:54, 2.03it/s]
|
995 |
89%|βββββββββ | 9476/10682 [1:21:02<09:54, 2.03it/s]
|
996 |
89%|βββββββββ | 9477/10682 [1:21:02<09:53, 2.03it/s]
|
997 |
89%|βββββββββ | 9478/10682 [1:21:03<09:53, 2.03it/s]
|
998 |
89%|βββββββββ | 9479/10682 [1:21:03<09:52, 2.03it/s]
|
999 |
89%|βββββββββ | 9480/10682 [1:21:04<09:51, 2.03it/s]
|
1000 |
89%|βββββββββ | 9481/10682 [1:21:04<09:51, 2.03it/s]
|
1001 |
89%|βββββββββ | 9482/10682 [1:21:05<09:51, 2.03it/s]
|
1002 |
89%|βββββββββ | 9483/10682 [1:21:05<09:50, 2.03it/s]
|
1003 |
89%|βββββββββ | 9484/10682 [1:21:06<09:49, 2.03it/s]
|
1004 |
89%|βββββββββ | 9485/10682 [1:21:06<09:50, 2.03it/s]
|
1005 |
89%|βββββββββ | 9486/10682 [1:21:07<09:49, 2.03it/s]
|
1006 |
89%|βββββββββ | 9487/10682 [1:21:07<09:49, 2.03it/s]
|
1007 |
89%|βββββββββ | 9488/10682 [1:21:08<09:48, 2.03it/s]
|
1008 |
89%|βββββββββ | 9489/10682 [1:21:08<09:47, 2.03it/s]
|
1009 |
89%|βββββββββ | 9490/10682 [1:21:09<09:47, 2.03it/s]
|
1010 |
89%|βββββββββ | 9491/10682 [1:21:09<09:46, 2.03it/s]
|
1011 |
89%|βββββββββ | 9492/10682 [1:21:10<09:45, 2.03it/s]
|
1012 |
89%|βββββββββ | 9493/10682 [1:21:10<09:45, 2.03it/s]
|
1013 |
89%|βββββββββ | 9494/10682 [1:21:11<09:44, 2.03it/s]
|
1014 |
89%|βββββββββ | 9495/10682 [1:21:11<09:45, 2.03it/s]
|
1015 |
89%|βββββββββ | 9496/10682 [1:21:12<09:44, 2.03it/s]
|
1016 |
89%|βββββββββ | 9497/10682 [1:21:12<09:43, 2.03it/s]
|
1017 |
89%|βββββββββ | 9498/10682 [1:21:13<09:43, 2.03it/s]
|
1018 |
89%|βββββββββ | 9499/10682 [1:21:13<09:41, 2.03it/s]
|
1019 |
89%|βββββββββ | 9500/10682 [1:21:14<09:41, 2.03it/s]
|
1020 |
|
1021 |
+
|
1022 |
89%|βββββββββ | 9500/10682 [1:21:14<09:41, 2.03it/s]
|
1023 |
89%|βββββββββ | 9501/10682 [1:21:14<09:41, 2.03it/s]
|
1024 |
89%|βββββββββ | 9502/10682 [1:21:15<09:40, 2.03it/s]
|
1025 |
89%|βββββββββ | 9503/10682 [1:21:15<09:40, 2.03it/s]
|
1026 |
89%|βββββββββ | 9504/10682 [1:21:16<09:40, 2.03it/s]
|
1027 |
89%|βββββββββ | 9505/10682 [1:21:16<09:40, 2.03it/s]
|
1028 |
89%|βββββββββ | 9506/10682 [1:21:17<09:39, 2.03it/s]
|
1029 |
89%|βββββββββ | 9507/10682 [1:21:17<09:39, 2.03it/s]
|
1030 |
89%|βββββββββ | 9508/10682 [1:21:18<09:39, 2.03it/s]
|
1031 |
89%|βββββββββ | 9509/10682 [1:21:18<09:38, 2.03it/s]
|
1032 |
89%|βββββββββ | 9510/10682 [1:21:19<09:37, 2.03it/s]
|
1033 |
89%|βββββββββ | 9511/10682 [1:21:19<09:36, 2.03it/s]
|
1034 |
89%|βββββββββ | 9512/10682 [1:21:20<09:36, 2.03it/s]
|
1035 |
89%|βββββββββ | 9513/10682 [1:21:20<09:35, 2.03it/s]
|
1036 |
89%|βββββββββ | 9514/10682 [1:21:21<09:35, 2.03it/s]
|
1037 |
89%|βββββββββ | 9515/10682 [1:21:21<09:34, 2.03it/s]
|
1038 |
89%|βββββββββ | 9516/10682 [1:21:22<09:34, 2.03it/s]
|
1039 |
89%|βββββββββ | 9517/10682 [1:21:22<09:34, 2.03it/s]
|
1040 |
89%|βββββββββ | 9518/10682 [1:21:23<10:24, 1.86it/s]
|
1041 |
89%|βββββββββ | 9519/10682 [1:21:23<10:08, 1.91it/s]
|
1042 |
89%|βββββββββ | 9520/10682 [1:21:24<09:57, 1.94it/s]
|
1043 |
89%|βββββββββ | 9521/10682 [1:21:24<09:49, 1.97it/s]
|
1044 |
89%|βββββββββ | 9522/10682 [1:21:25<09:43, 1.99it/s]
|
1045 |
89%|βββββββββ | 9523/10682 [1:21:25<09:40, 2.00it/s]
|
1046 |
89%|βββββββββ | 9524/10682 [1:21:26<09:36, 2.01it/s]
|
1047 |
89%|βββββββββ | 9525/10682 [1:21:26<09:34, 2.01it/s]{'loss': 2.6339, 'grad_norm': 0.2671761214733124, 'learning_rate': 3.531897635965431e-05, 'epoch': 12.48}
|
1048 |
+
|
1049 |
|
1050 |
89%|βββββββββ | 9525/10682 [1:21:26<09:34, 2.01it/s]
|
1051 |
89%|βββββββββ | 9526/10682 [1:21:27<09:33, 2.02it/s]
|
1052 |
89%|βββββββββ | 9527/10682 [1:21:27<09:32, 2.02it/s]
|
1053 |
89%|βββββββββ | 9528/10682 [1:21:28<09:30, 2.02it/s]
|
1054 |
89%|βββββββββ | 9529/10682 [1:21:28<09:29, 2.02it/s]
|
1055 |
89%|βββββββββ | 9530/10682 [1:21:29<09:29, 2.02it/s]
|
1056 |
89%|βββββββββ | 9531/10682 [1:21:29<09:27, 2.03it/s]
|
1057 |
89%|βββββββββ | 9532/10682 [1:21:30<09:28, 2.02it/s]
|
1058 |
89%|βββββββββ | 9533/10682 [1:21:30<09:26, 2.03it/s]
|
1059 |
89%|βββββββββ | 9534/10682 [1:21:31<09:26, 2.03it/s]
|
1060 |
89%|βββββββββ | 9535/10682 [1:21:31<09:25, 2.03it/s]
|
1061 |
89%|βββββββββ | 9536/10682 [1:21:32<09:25, 2.03it/s]
|
1062 |
89%|βββββββββ | 9537/10682 [1:21:32<09:24, 2.03it/s]
|
1063 |
89%|βββββββββ | 9538/10682 [1:21:33<09:23, 2.03it/s]
|
1064 |
89%|βββββββββ | 9539/10682 [1:21:33<09:23, 2.03it/s]
|
1065 |
89%|βββββββββ | 9540/10682 [1:21:34<09:23, 2.03it/s]
|
1066 |
89%|βββββββββ | 9541/10682 [1:21:34<09:23, 2.03it/s]
|
1067 |
89%|βββββββββ | 9542/10682 [1:21:35<09:22, 2.03it/s]
|
1068 |
89%|βββββββββ | 9543/10682 [1:21:35<09:21, 2.03it/s]
|
1069 |
89%|βββββββββ | 9544/10682 [1:21:36<09:20, 2.03it/s]
|
1070 |
89%|βββββββββ | 9545/10682 [1:21:36<09:20, 2.03it/s]
|
1071 |
89%|βββββββββ | 9546/10682 [1:21:37<09:20, 2.03it/s]
|
1072 |
89%|βββββββββ | 9547/10682 [1:21:37<09:19, 2.03it/s]
|
1073 |
89%|βββββββββ | 9548/10682 [1:21:38<09:19, 2.03it/s]
|
1074 |
89%|βββββββββ | 9549/10682 [1:21:38<09:18, 2.03it/s]
|
1075 |
89%|βββββββββ | 9550/10682 [1:21:38<09:18, 2.03it/s]{'loss': 2.6398, 'grad_norm': 0.2684043049812317, 'learning_rate': 3.382641354591731e-05, 'epoch': 12.51}
|
1076 |
+
|
1077 |
|
1078 |
89%|βββββββββ | 9550/10682 [1:21:38<09:18, 2.03it/s]
|
1079 |
89%|βββββββββ | 9551/10682 [1:21:39<09:18, 2.03it/s]
|
1080 |
89%|βββββββββ | 9552/10682 [1:21:39<09:17, 2.03it/s]
|
1081 |
89%|βββββββββ | 9553/10682 [1:21:40<09:17, 2.02it/s]
|
1082 |
89%|βββββββββ | 9554/10682 [1:21:40<09:16, 2.03it/s]
|
1083 |
89%|βββββββββ | 9555/10682 [1:21:41<09:15, 2.03it/s]
|
1084 |
89%|βββββββββ | 9556/10682 [1:21:41<09:15, 2.03it/s]
|
1085 |
89%|βββββββββ | 9557/10682 [1:21:42<09:14, 2.03it/s]
|
1086 |
89%|βββββββββ | 9558/10682 [1:21:42<09:13, 2.03it/s]
|
1087 |
89%|βββββββββ | 9559/10682 [1:21:43<09:14, 2.03it/s]
|
1088 |
89%|βββββββββ | 9560/10682 [1:21:43<09:13, 2.03it/s]
|
1089 |
90%|βββββββββ | 9561/10682 [1:21:44<09:12, 2.03it/s]
|
1090 |
90%|βββββββββ | 9562/10682 [1:21:44<09:12, 2.03it/s]
|
1091 |
90%|βββββββββ | 9563/10682 [1:21:45<09:11, 2.03it/s]
|
1092 |
90%|βββββββββ | 9564/10682 [1:21:45<09:11, 2.03it/s]
|
1093 |
90%|βββββββββ | 9565/10682 [1:21:46<09:11, 2.03it/s]
|
1094 |
90%|βββββββββ | 9566/10682 [1:21:46<09:11, 2.02it/s]
|
1095 |
90%|βββββββββ | 9567/10682 [1:21:47<09:10, 2.02it/s]
|
1096 |
90%|βββββββββ | 9568/10682 [1:21:47<09:10, 2.02it/s]
|
1097 |
90%|βββββββββ | 9569/10682 [1:21:48<09:09, 2.02it/s]
|
1098 |
90%|βββββββββ | 9570/10682 [1:21:48<09:09, 2.03it/s]
|
1099 |
90%|βββββββββ | 9571/10682 [1:21:49<09:08, 2.02it/s]
|
1100 |
90%|βββββββββ | 9572/10682 [1:21:49<09:08, 2.03it/s]
|
1101 |
90%|βββββββββ | 9573/10682 [1:21:50<09:07, 2.03it/s]
|
1102 |
90%|βββββββββ | 9574/10682 [1:21:50<09:06, 2.03it/s]
|
1103 |
90%|βββββββββ | 9575/10682 [1:21:51<09:06, 2.03it/s]{'loss': 2.6295, 'grad_norm': 0.2759605348110199, 'learning_rate': 3.2364968406054075e-05, 'epoch': 12.54}
|
1104 |
+
|
1105 |
|
1106 |
90%|βββββββββ | 9575/10682 [1:21:51<09:06, 2.03it/s]
|
1107 |
90%|βββββββββ | 9576/10682 [1:21:51<09:06, 2.02it/s]
|
1108 |
90%|βββββββββ | 9577/10682 [1:21:52<09:05, 2.02it/s]
|
1109 |
90%|βββββββββ | 9578/10682 [1:21:52<09:05, 2.02it/s]
|
1110 |
90%|βββββββββ | 9579/10682 [1:21:53<09:04, 2.03it/s]
|
1111 |
90%|βββββββββ | 9580/10682 [1:21:53<09:03, 2.03it/s]
|
1112 |
90%|βββββββββ | 9581/10682 [1:21:54<09:02, 2.03it/s]
|
1113 |
90%|βββββββββ | 9582/10682 [1:21:54<09:02, 2.03it/s]
|
1114 |
90%|βββββββββ | 9583/10682 [1:21:55<09:02, 2.03it/s]
|
1115 |
90%|βββββββββ | 9584/10682 [1:21:55<09:01, 2.03it/s]
|
1116 |
90%|βββββββββ | 9585/10682 [1:21:56<09:01, 2.03it/s]
|
1117 |
90%|βββββββββ | 9586/10682 [1:21:56<09:00, 2.03it/s]
|
1118 |
90%|βββββββββ | 9587/10682 [1:21:57<09:00, 2.03it/s]
|
1119 |
90%|βββββββββ | 9588/10682 [1:21:57<08:59, 2.03it/s]
|
1120 |
90%|βββββββββ | 9589/10682 [1:21:58<08:59, 2.03it/s]
|
1121 |
90%|βββββββββ | 9590/10682 [1:21:58<08:58, 2.03it/s]
|
1122 |
90%|βββββββββ | 9591/10682 [1:21:59<08:57, 2.03it/s]
|
1123 |
90%|βββββββββ | 9592/10682 [1:21:59<08:57, 2.03it/s]
|
1124 |
90%|βββββββββ | 9593/10682 [1:22:00<08:56, 2.03it/s]
|
1125 |
90%|βββββββββ | 9594/10682 [1:22:00<08:57, 2.03it/s]
|
1126 |
90%|βββββββββ | 9595/10682 [1:22:01<08:56, 2.03it/s]
|
1127 |
90%|βββββββββ | 9596/10682 [1:22:01<08:56, 2.03it/s]
|
1128 |
90%|βββββββββ | 9597/10682 [1:22:02<08:55, 2.03it/s]
|
1129 |
90%|βββββββββ | 9598/10682 [1:22:02<08:54, 2.03it/s]
|
1130 |
90%|βββββββββ | 9599/10682 [1:22:03<08:53, 2.03it/s]
|
1131 |
90%|βββββββββ | 9600/10682 [1:22:03<08:53, 2.03it/s]{'loss': 2.6293, 'grad_norm': 0.2649473547935486, 'learning_rate': 3.093473849336781e-05, 'epoch': 12.57}
|
1132 |
+
|
1133 |
|
1134 |
90%|βββββββββ | 9600/10682 [1:22:03<08:53, 2.03it/s]
|
1135 |
90%|βββββββββ | 9601/10682 [1:22:04<08:54, 2.02it/s]
|
1136 |
90%|βββββββββ | 9602/10682 [1:22:04<08:53, 2.02it/s]
|
1137 |
90%|βββββββββ | 9603/10682 [1:22:05<08:52, 2.02it/s]
|
1138 |
90%|βββββββββ | 9604/10682 [1:22:05<08:51, 2.03it/s]
|
1139 |
90%|βββββββββ | 9605/10682 [1:22:06<08:51, 2.03it/s]
|
1140 |
90%|βββββββββ | 9606/10682 [1:22:06<08:50, 2.03it/s]
|
1141 |
90%|βββββββββ | 9607/10682 [1:22:07<08:50, 2.03it/s]
|
1142 |
90%|βββββββββ | 9608/10682 [1:22:07<08:49, 2.03it/s]
|
1143 |
90%|βββββββββ | 9609/10682 [1:22:08<08:49, 2.03it/s]
|
1144 |
90%|βββββββββ | 9610/10682 [1:22:08<08:49, 2.03it/s]
|
1145 |
90%|βββββββββ | 9611/10682 [1:22:09<09:33, 1.87it/s]
|
1146 |
90%|βββββββββ | 9612/10682 [1:22:09<09:19, 1.91it/s]
|
1147 |
90%|βββββββββ | 9613/10682 [1:22:10<09:09, 1.95it/s]
|
1148 |
90%|βββββββββ | 9614/10682 [1:22:10<09:01, 1.97it/s]
|
1149 |
90%|βββββββββ | 9615/10682 [1:22:11<08:56, 1.99it/s]
|
1150 |
90%|βββββββββ | 9616/10682 [1:22:11<08:52, 2.00it/s]
|
1151 |
90%|βββββββββ | 9617/10682 [1:22:12<08:50, 2.01it/s]
|
1152 |
90%|βββββββββ | 9618/10682 [1:22:12<08:48, 2.01it/s]
|
1153 |
90%|βββββββββ | 9619/10682 [1:22:13<08:46, 2.02it/s]
|
1154 |
90%|βββββββββ | 9620/10682 [1:22:13<08:45, 2.02it/s]
|
1155 |
90%|βββββββββ | 9621/10682 [1:22:14<08:44, 2.02it/s]
|
1156 |
90%|βββββββββ | 9622/10682 [1:22:14<08:44, 2.02it/s]
|
1157 |
90%|βββββββββ | 9623/10682 [1:22:15<08:43, 2.02it/s]
|
1158 |
90%|βββββββββ | 9624/10682 [1:22:15<08:42, 2.02it/s]
|
1159 |
90%|βββββββββ | 9625/10682 [1:22:16<08:41, 2.03it/s]{'loss': 2.6392, 'grad_norm': 0.26671603322029114, 'learning_rate': 2.9535819277506203e-05, 'epoch': 12.61}
|
1160 |
+
|
1161 |
|
1162 |
90%|βββββββββ | 9625/10682 [1:22:16<08:41, 2.03it/s]
|
1163 |
90%|βββββββββ | 9626/10682 [1:22:16<08:41, 2.02it/s]
|
1164 |
90%|βββββββββ | 9627/10682 [1:22:17<08:40, 2.03it/s]
|
1165 |
90%|βββββββββ | 9628/10682 [1:22:17<08:40, 2.03it/s]
|
1166 |
90%|βββββββββ | 9629/10682 [1:22:18<08:39, 2.03it/s]
|
1167 |
90%|βββββββββ | 9630/10682 [1:22:18<08:39, 2.03it/s]
|
1168 |
90%|βββββββββ | 9631/10682 [1:22:19<08:38, 2.03it/s]
|
1169 |
90%|βββββββββ | 9632/10682 [1:22:19<08:38, 2.03it/s]
|
1170 |
90%|βββββββββ | 9633/10682 [1:22:20<08:37, 2.03it/s]
|
1171 |
90%|βββββββββ | 9634/10682 [1:22:20<08:37, 2.03it/s]
|
1172 |
90%|βββββββββ | 9635/10682 [1:22:21<08:36, 2.03it/s]
|
1173 |
90%|βββββββββ | 9636/10682 [1:22:21<08:35, 2.03it/s]
|
1174 |
90%|βββββββββ | 9637/10682 [1:22:22<08:35, 2.03it/s]
|
1175 |
90%|βββββββββ | 9638/10682 [1:22:22<08:35, 2.03it/s]
|
1176 |
90%|βββββββββ | 9639/10682 [1:22:23<08:34, 2.03it/s]
|
1177 |
90%|βββββββββ | 9640/10682 [1:22:23<08:34, 2.03it/s]
|
1178 |
90%|βββββββββ | 9641/10682 [1:22:24<08:33, 2.03it/s]
|
1179 |
90%|βββββββββ | 9642/10682 [1:22:24<08:32, 2.03it/s]
|
1180 |
90%|βββββββββ | 9643/10682 [1:22:25<08:32, 2.03it/s]
|
1181 |
90%|βββββββββ | 9644/10682 [1:22:25<08:31, 2.03it/s]
|
1182 |
90%|βββββββββ | 9645/10682 [1:22:26<08:31, 2.03it/s]
|
1183 |
90%|βββββββββ | 9646/10682 [1:22:26<08:30, 2.03it/s]
|
1184 |
90%|βββββββββ | 9647/10682 [1:22:26<08:29, 2.03it/s]
|
1185 |
90%|βββββββββ | 9648/10682 [1:22:27<08:29, 2.03it/s]
|
1186 |
90%|βββββββββ | 9649/10682 [1:22:27<08:28, 2.03it/s]
|
1187 |
90%|βββββββββ | 9650/10682 [1:22:28<08:27, 2.03it/s]{'loss': 2.6413, 'grad_norm': 0.27070415019989014, 'learning_rate': 2.8168304138088295e-05, 'epoch': 12.64}
|
1188 |
|
1189 |
+
|
1190 |
90%|βββββββββ | 9650/10682 [1:22:28<08:27, 2.03it/s]
|
1191 |
90%|βββββββββ | 9651/10682 [1:22:28<08:28, 2.03it/s]
|
1192 |
90%|βββββββββ | 9652/10682 [1:22:29<08:27, 2.03it/s]
|
1193 |
90%|βββββββββ | 9653/10682 [1:22:29<08:27, 2.03it/s]
|
1194 |
90%|βββββββββ | 9654/10682 [1:22:30<08:26, 2.03it/s]
|
1195 |
90%|βββββββββ | 9655/10682 [1:22:30<08:26, 2.03it/s]
|
1196 |
90%|βββββββββ | 9656/10682 [1:22:31<08:25, 2.03it/s]
|
1197 |
90%|βββββββββ | 9657/10682 [1:22:31<08:25, 2.03it/s]
|
1198 |
90%|βββββββββ | 9658/10682 [1:22:32<08:25, 2.03it/s]
|
1199 |
90%|βββββββββ | 9659/10682 [1:22:32<08:24, 2.03it/s]
|
1200 |
90%|βββββββββ | 9660/10682 [1:22:33<08:24, 2.02it/s]
|
1201 |
90%|βββββββββ | 9661/10682 [1:22:33<08:24, 2.03it/s]
|
1202 |
90%|βββββββββ | 9662/10682 [1:22:34<08:24, 2.02it/s]
|
1203 |
90%|βββββββββ | 9663/10682 [1:22:34<08:23, 2.02it/s]
|
1204 |
90%|βββββββββ | 9664/10682 [1:22:35<08:23, 2.02it/s]
|
1205 |
90%|βββββββββ | 9665/10682 [1:22:35<08:23, 2.02it/s]
|
1206 |
90%|βββββββββ | 9666/10682 [1:22:36<08:22, 2.02it/s]
|
1207 |
90%|βββββββββ | 9667/10682 [1:22:36<08:21, 2.02it/s]
|
1208 |
91%|βββββββββ | 9668/10682 [1:22:37<08:21, 2.02it/s]
|
1209 |
91%|βββββββββ | 9669/10682 [1:22:37<08:20, 2.03it/s]
|
1210 |
91%|βββββββββ | 9670/10682 [1:22:38<08:19, 2.03it/s]
|
1211 |
91%|βββββββββ | 9671/10682 [1:22:38<08:19, 2.03it/s]
|
1212 |
91%|βββββββββ | 9672/10682 [1:22:39<08:18, 2.03it/s]
|
1213 |
91%|βββββββββ | 9673/10682 [1:22:39<08:17, 2.03it/s]
|
1214 |
91%|βββββββββ | 9674/10682 [1:22:40<08:16, 2.03it/s]
|
1215 |
91%|βββββββββ | 9675/10682 [1:22:40<08:16, 2.03it/s]{'loss': 2.6238, 'grad_norm': 0.26584649085998535, 'learning_rate': 2.6832284358471516e-05, 'epoch': 12.67}
|
1216 |
+
|
1217 |
|
1218 |
91%|βββββββββ | 9675/10682 [1:22:40<08:16, 2.03it/s]
|
1219 |
91%|βββββββββ | 9676/10682 [1:22:41<08:16, 2.02it/s]
|
1220 |
91%|βββββββββ | 9677/10682 [1:22:41<08:16, 2.02it/s]
|
1221 |
91%|βββββββββ | 9678/10682 [1:22:42<08:15, 2.02it/s]
|
1222 |
91%|βββββββββ | 9679/10682 [1:22:42<08:14, 2.03it/s]
|
1223 |
91%|βββββββββ | 9680/10682 [1:22:43<08:14, 2.03it/s]
|
1224 |
91%|βββββββββ | 9681/10682 [1:22:43<08:14, 2.03it/s]
|
1225 |
91%|βββββββββ | 9682/10682 [1:22:44<08:13, 2.02it/s]
|
1226 |
91%|βββββββββ | 9683/10682 [1:22:44<08:13, 2.02it/s]
|
1227 |
91%|βββββββββ | 9684/10682 [1:22:45<08:13, 2.02it/s]
|
1228 |
91%|βββββββββ | 9685/10682 [1:22:45<08:12, 2.03it/s]
|
1229 |
91%|βββββββββ | 9686/10682 [1:22:46<08:11, 2.03it/s]
|
1230 |
91%|βββββββββ | 9687/10682 [1:22:46<08:10, 2.03it/s]
|
1231 |
91%|βββββββββ | 9688/10682 [1:22:47<08:10, 2.03it/s]
|
1232 |
91%|βββββββββ | 9689/10682 [1:22:47<08:10, 2.03it/s]
|
1233 |
91%|βββββββββ | 9690/10682 [1:22:48<08:08, 2.03it/s]
|
1234 |
91%|βββββββββ | 9691/10682 [1:22:48<08:08, 2.03it/s]
|
1235 |
91%|βββββββββ | 9692/10682 [1:22:49<08:08, 2.03it/s]
|
1236 |
91%|βββββββββ | 9693/10682 [1:22:49<08:07, 2.03it/s]
|
1237 |
91%|βββββββββ | 9694/10682 [1:22:50<08:08, 2.02it/s]
|
1238 |
91%|βββββββββ | 9695/10682 [1:22:50<08:07, 2.02it/s]
|
1239 |
91%|βββββββββ | 9696/10682 [1:22:51<08:06, 2.03it/s]
|
1240 |
91%|βββββββββ | 9697/10682 [1:22:51<08:06, 2.03it/s]
|
1241 |
91%|βββββββββ | 9698/10682 [1:22:52<08:04, 2.03it/s]
|
1242 |
91%|βββββββββ | 9699/10682 [1:22:52<08:04, 2.03it/s]
|
1243 |
91%|βββββββββ | 9700/10682 [1:22:53<08:04, 2.03it/s]
|
1244 |
{'loss': 2.6376, 'grad_norm': 0.266446590423584, 'learning_rate': 2.5527849119658387e-05, 'epoch': 12.7}
|
1245 |
+
|
1246 |
91%|βββββββββ | 9700/10682 [1:22:53<08:04, 2.03it/s]
|
1247 |
91%|βββββββββ | 9701/10682 [1:22:53<08:05, 2.02it/s]
|
1248 |
91%|βββββββββ | 9702/10682 [1:22:54<08:04, 2.02it/s]
|
1249 |
91%|βββββββββ | 9703/10682 [1:22:54<08:03, 2.02it/s]
|
1250 |
91%|βββββββββ | 9704/10682 [1:22:55<08:02, 2.03it/s]
|
1251 |
91%|βββββββββ | 9705/10682 [1:22:55<08:02, 2.02it/s]
|
1252 |
91%|βββββββββ | 9706/10682 [1:22:56<08:02, 2.02it/s]
|
1253 |
91%|βββββββββ | 9707/10682 [1:22:56<08:02, 2.02it/s]
|
1254 |
91%|βββββββββ | 9708/10682 [1:22:57<08:00, 2.03it/s]
|
1255 |
91%|βββββββββ | 9709/10682 [1:22:57<08:00, 2.02it/s]
|
1256 |
91%|βββββββββ | 9710/10682 [1:22:58<07:59, 2.03it/s]
|
1257 |
91%|βββββββββ | 9711/10682 [1:22:58<07:58, 2.03it/s]
|
1258 |
91%|βββββββββ | 9712/10682 [1:22:59<07:58, 2.03it/s]
|
1259 |
91%|βββββββββ | 9713/10682 [1:22:59<07:57, 2.03it/s]
|
1260 |
91%|βββββββββ | 9714/10682 [1:23:00<07:57, 2.03it/s]
|
1261 |
91%|βββββββββ | 9715/10682 [1:23:00<07:56, 2.03it/s]
|
1262 |
91%|βββββββββ | 9716/10682 [1:23:01<07:55, 2.03it/s]
|
1263 |
91%|βββββββββ | 9717/10682 [1:23:01<07:54, 2.03it/s]
|
1264 |
91%|βββββββββ | 9718/10682 [1:23:02<07:54, 2.03it/s]
|
1265 |
91%|βββββββββ | 9719/10682 [1:23:02<07:54, 2.03it/s]
|
1266 |
91%|βββββββββ | 9720/10682 [1:23:03<07:54, 2.03it/s]
|
1267 |
91%|βββββββββ | 9721/10682 [1:23:03<07:53, 2.03it/s]
|
1268 |
91%|βββββββββ | 9722/10682 [1:23:03<07:53, 2.03it/s]
|
1269 |
91%|βββββββββ | 9723/10682 [1:23:04<07:53, 2.03it/s]
|
1270 |
91%|βββββββββ | 9724/10682 [1:23:04<07:52, 2.03it/s]
|
1271 |
91%|βββββββββ | 9725/10682 [1:23:05<07:52, 2.03it/s]
|
1272 |
{'loss': 2.6387, 'grad_norm': 0.2688989043235779, 'learning_rate': 2.4255085494343522e-05, 'epoch': 12.74}
|
1273 |
+
|
1274 |
91%|βββββββββ | 9725/10682 [1:23:05<07:52, 2.03it/s]
|
1275 |
91%|βββββββββ | 9726/10682 [1:23:05<07:52, 2.02it/s]
|
1276 |
91%|βββββββββ | 9727/10682 [1:23:06<07:51, 2.03it/s]
|
1277 |
91%|βββββββββ | 9728/10682 [1:23:06<07:50, 2.03it/s]
|
1278 |
91%|βββββββββ | 9729/10682 [1:23:07<07:50, 2.03it/s]
|
1279 |
91%|βββββββββ | 9730/10682 [1:23:07<07:49, 2.03it/s]
|
1280 |
91%|βββββββββ | 9731/10682 [1:23:08<07:48, 2.03it/s]
|
1281 |
91%|βββββββββ | 9732/10682 [1:23:08<07:48, 2.03it/s]
|
1282 |
91%|βββββββββ | 9733/10682 [1:23:09<07:47, 2.03it/s]
|
1283 |
91%|βββββββββ | 9734/10682 [1:23:09<07:47, 2.03it/s]
|
1284 |
91%|βββββββββ | 9735/10682 [1:23:10<07:47, 2.03it/s]
|
1285 |
91%|βββββββββ | 9736/10682 [1:23:10<07:46, 2.03it/s]
|
1286 |
91%|βββββββββ | 9737/10682 [1:23:11<07:46, 2.02it/s]
|
1287 |
91%|βββββββββ | 9738/10682 [1:23:11<07:46, 2.02it/s]
|
1288 |
91%|βββββββββ | 9739/10682 [1:23:12<07:45, 2.02it/s]
|
1289 |
91%|βββββββββ | 9740/10682 [1:23:12<07:45, 2.03it/s]
|
1290 |
91%|βββββββββ | 9741/10682 [1:23:13<07:44, 2.02it/s]
|
1291 |
91%|βββββββββ | 9742/10682 [1:23:13<07:44, 2.03it/s]
|
1292 |
91%|βββββββββ | 9743/10682 [1:23:14<07:43, 2.02it/s]
|
1293 |
91%|βββββββββ | 9744/10682 [1:23:14<07:43, 2.03it/s]
|
1294 |
91%|βββββββββ | 9745/10682 [1:23:15<07:42, 2.02it/s]
|
1295 |
91%|βββββββββ | 9746/10682 [1:23:15<07:41, 2.03it/s]
|
1296 |
91%|βββββββββ | 9747/10682 [1:23:16<07:41, 2.03it/s]
|
1297 |
91%|ββββββββββ| 9748/10682 [1:23:16<07:40, 2.03it/s]
|
1298 |
91%|ββββββββββ| 9749/10682 [1:23:17<07:39, 2.03it/s]
|
1299 |
91%|ββββββββββ| 9750/10682 [1:23:17<07:39, 2.03it/s]{'loss': 2.6414, 'grad_norm': 0.269853413105011, 'learning_rate': 2.301407844110154e-05, 'epoch': 12.77}
|
1300 |
+
|
1301 |
|
1302 |
91%|ββββββββββ| 9750/10682 [1:23:17<07:39, 2.03it/s]
|
1303 |
91%|ββββββββββ| 9751/10682 [1:23:18<07:39, 2.02it/s]
|
1304 |
91%|ββββββββββ| 9752/10682 [1:23:18<07:39, 2.03it/s]
|
1305 |
91%|ββββββββββ| 9753/10682 [1:23:19<07:38, 2.02it/s]
|
1306 |
91%|ββββββββββ| 9754/10682 [1:23:19<07:38, 2.03it/s]
|
1307 |
91%|ββββββββββ| 9755/10682 [1:23:20<07:37, 2.03it/s]
|
1308 |
91%|ββββββββββ| 9756/10682 [1:23:20<07:36, 2.03it/s]
|
1309 |
91%|ββββββββββ| 9757/10682 [1:23:21<07:36, 2.03it/s]
|
1310 |
91%|ββββββββββ| 9758/10682 [1:23:21<07:35, 2.03it/s]
|
1311 |
91%|ββββββββββ| 9759/10682 [1:23:22<07:35, 2.03it/s]
|
1312 |
91%|ββββββββββ| 9760/10682 [1:23:22<07:34, 2.03it/s]
|
1313 |
91%|ββββββββββ| 9761/10682 [1:23:23<07:33, 2.03it/s]
|
1314 |
91%|ββββββββββ| 9762/10682 [1:23:23<07:34, 2.02it/s]
|
1315 |
91%|ββββββββββ| 9763/10682 [1:23:24<07:34, 2.02it/s]
|
1316 |
91%|ββββββββββ| 9764/10682 [1:23:24<07:33, 2.02it/s]
|
1317 |
91%|ββββββββββ| 9765/10682 [1:23:25<07:33, 2.02it/s]
|
1318 |
91%|ββββββββββ| 9766/10682 [1:23:25<07:32, 2.02it/s]
|
1319 |
91%|ββββββββββ| 9767/10682 [1:23:26<07:32, 2.02it/s]
|
1320 |
91%|ββββββββββ| 9768/10682 [1:23:26<07:31, 2.02it/s]
|
1321 |
91%|ββββββββββ| 9769/10682 [1:23:27<07:30, 2.03it/s]
|
1322 |
91%|ββββββββββ| 9770/10682 [1:23:27<07:29, 2.03it/s]
|
1323 |
91%|ββββββββββ| 9771/10682 [1:23:28<07:29, 2.03it/s]
|
1324 |
91%|ββββββββββ| 9772/10682 [1:23:28<07:28, 2.03it/s]
|
1325 |
91%|ββββββββββ| 9773/10682 [1:23:29<07:27, 2.03it/s]
|
1326 |
91%|ββββββββββ| 9774/10682 [1:23:29<07:27, 2.03it/s]
|
1327 |
92%|ββββββββββ| 9775/10682 [1:23:30<07:27, 2.03it/s]
|
1328 |
{'loss': 2.639, 'grad_norm': 0.2653633952140808, 'learning_rate': 2.1804910798715826e-05, 'epoch': 12.8}
|
1329 |
+
|
1330 |
92%|ββββββββββ| 9775/10682 [1:23:30<07:27, 2.03it/s]
|
1331 |
92%|ββββββββββ| 9776/10682 [1:23:30<07:27, 2.02it/s]
|
1332 |
92%|ββββββββββ| 9777/10682 [1:23:31<07:26, 2.03it/s]
|
1333 |
92%|ββββββββββ| 9778/10682 [1:23:31<07:26, 2.02it/s]
|
1334 |
92%|ββββββββββ| 9779/10682 [1:23:32<07:25, 2.02it/s]
|
1335 |
92%|ββββββββββ| 9780/10682 [1:23:32<07:25, 2.03it/s]
|
1336 |
92%|ββββββββββ| 9781/10682 [1:23:33<07:24, 2.03it/s]
|
1337 |
92%|ββββββββββ| 9782/10682 [1:23:33<07:24, 2.03it/s]
|
1338 |
92%|ββββββββββ| 9783/10682 [1:23:34<07:24, 2.02it/s]
|
1339 |
92%|ββββββββββ| 9784/10682 [1:23:34<07:23, 2.02it/s]
|
1340 |
92%|ββββββββββ| 9785/10682 [1:23:35<07:23, 2.02it/s]
|
1341 |
92%|ββββββββββ| 9786/10682 [1:23:35<07:22, 2.02it/s]
|
1342 |
92%|ββββββββββ| 9787/10682 [1:23:36<07:22, 2.02it/s]
|
1343 |
92%|ββββββββββ| 9788/10682 [1:23:36<07:21, 2.02it/s]
|
1344 |
92%|ββββββββββ| 9789/10682 [1:23:37<07:21, 2.02it/s]
|
1345 |
92%|ββββββββββ| 9790/10682 [1:23:37<07:20, 2.03it/s]
|
1346 |
92%|ββββββββββ| 9791/10682 [1:23:38<07:19, 2.03it/s]
|
1347 |
92%|ββββββββββ| 9792/10682 [1:23:38<07:19, 2.03it/s]
|
1348 |
92%|ββββββββββ| 9793/10682 [1:23:39<07:18, 2.03it/s]
|
1349 |
92%|ββββββββββ| 9794/10682 [1:23:39<07:18, 2.02it/s]
|
1350 |
92%|ββββββββββ| 9795/10682 [1:23:40<07:17, 2.03it/s]
|
1351 |
92%|ββββββββββ| 9796/10682 [1:23:40<07:18, 2.02it/s]
|
1352 |
92%|ββββββββββ| 9797/10682 [1:23:41<07:17, 2.02it/s]
|
1353 |
92%|ββββββββββ| 9798/10682 [1:23:41<07:17, 2.02it/s]
|
1354 |
92%|ββββββββββ| 9799/10682 [1:23:42<07:16, 2.02it/s]
|
1355 |
92%|ββββββββββ| 9800/10682 [1:23:42<07:15, 2.03it/s]{'loss': 2.6445, 'grad_norm': 0.2692641615867615, 'learning_rate': 2.0627663280649135e-05, 'epoch': 12.84}
|
1356 |
+
|
1357 |
|
1358 |
92%|ββββββββββ| 9800/10682 [1:23:42<07:15, 2.03it/s]
|
1359 |
92%|ββββββββββ| 9801/10682 [1:23:42<07:15, 2.02it/s]
|
1360 |
92%|ββββββββββ| 9802/10682 [1:23:43<07:14, 2.02it/s]
|
1361 |
92%|ββββββββββ| 9803/10682 [1:23:43<07:14, 2.03it/s]
|
1362 |
92%|ββββββββββ| 9804/10682 [1:23:44<07:13, 2.03it/s]
|
1363 |
92%|ββββββββββ| 9805/10682 [1:23:44<07:13, 2.02it/s]
|
1364 |
92%|ββββββββββ| 9806/10682 [1:23:45<07:12, 2.03it/s]
|
1365 |
92%|ββββββββββ| 9807/10682 [1:23:45<07:11, 2.03it/s]
|
1366 |
92%|ββββββββββ| 9808/10682 [1:23:46<07:11, 2.03it/s]
|
1367 |
92%|ββββββββββ| 9809/10682 [1:23:46<07:10, 2.03it/s]
|
1368 |
92%|ββββββββββ| 9810/10682 [1:23:47<07:09, 2.03it/s]
|
1369 |
92%|ββββββββββ| 9811/10682 [1:23:47<07:09, 2.03it/s]
|
1370 |
92%|ββββββββββ| 9812/10682 [1:23:48<07:09, 2.03it/s]
|
1371 |
92%|ββββββββββ| 9813/10682 [1:23:48<07:08, 2.03it/s]
|
1372 |
92%|ββββββββββ| 9814/10682 [1:23:49<07:08, 2.02it/s]
|
1373 |
92%|ββββββββββ| 9815/10682 [1:23:49<07:07, 2.03it/s]
|
1374 |
92%|ββββββββββ| 9816/10682 [1:23:50<07:07, 2.03it/s]
|
1375 |
92%|ββββββββββ| 9817/10682 [1:23:50<07:06, 2.03it/s]
|
1376 |
92%|ββββββββββ| 9818/10682 [1:23:51<07:05, 2.03it/s]
|
1377 |
92%|ββββββββββ| 9819/10682 [1:23:51<07:05, 2.03it/s]
|
1378 |
92%|ββββββββββ| 9820/10682 [1:23:52<07:04, 2.03it/s]
|
1379 |
92%|ββββββββββ| 9821/10682 [1:23:52<07:04, 2.03it/s]
|
1380 |
92%|ββββββββββ| 9822/10682 [1:23:53<07:04, 2.03it/s]
|
1381 |
92%|ββββββββββ| 9823/10682 [1:23:53<07:03, 2.03it/s]
|
1382 |
92%|ββββββββββ| 9824/10682 [1:23:54<07:03, 2.03it/s]
|
1383 |
92%|ββββββββββ| 9825/10682 [1:23:54<07:02, 2.03it/s]{'loss': 2.6394, 'grad_norm': 0.26735222339630127, 'learning_rate': 1.9482414469655486e-05, 'epoch': 12.87}
|
1384 |
+
|
1385 |
|
1386 |
92%|ββββββββββ| 9825/10682 [1:23:54<07:02, 2.03it/s]
|
1387 |
92%|ββββββββββ| 9826/10682 [1:23:55<07:03, 2.02it/s]
|
1388 |
92%|ββββββββββ| 9827/10682 [1:23:55<07:02, 2.02it/s]
|
1389 |
92%|ββββββββββ| 9828/10682 [1:23:56<07:02, 2.02it/s]
|
1390 |
92%|ββββββββββ| 9829/10682 [1:23:56<07:01, 2.02it/s]
|
1391 |
92%|ββββββββββ| 9830/10682 [1:23:57<07:00, 2.03it/s]
|
1392 |
92%|ββββββββββ| 9831/10682 [1:23:57<07:00, 2.02it/s]
|
1393 |
92%|ββββββββββ| 9832/10682 [1:23:58<06:59, 2.03it/s]
|
1394 |
92%|ββββββββββ| 9833/10682 [1:23:58<06:58, 2.03it/s]
|
1395 |
92%|ββββββββββ| 9834/10682 [1:23:59<06:58, 2.03it/s]
|
1396 |
92%|ββββββββββ| 9835/10682 [1:23:59<06:57, 2.03it/s]
|
1397 |
92%|ββββββββββ| 9836/10682 [1:24:00<06:57, 2.03it/s]
|
1398 |
92%|ββββββββββ| 9837/10682 [1:24:00<06:56, 2.03it/s]
|
1399 |
92%|ββββββββββ| 9838/10682 [1:24:01<06:56, 2.03it/s]
|
1400 |
92%|ββββββββββ| 9839/10682 [1:24:01<06:55, 2.03it/s]
|
1401 |
92%|ββββββββββ| 9840/10682 [1:24:02<06:55, 2.03it/s]
|
1402 |
92%|ββββββββββ| 9841/10682 [1:24:02<06:54, 2.03it/s]
|
1403 |
92%|ββββββββββ| 9842/10682 [1:24:03<06:54, 2.03it/s]
|
1404 |
92%|ββββββββββ| 9843/10682 [1:24:03<06:53, 2.03it/s]
|
1405 |
92%|ββββββββββ| 9844/10682 [1:24:04<06:53, 2.03it/s]
|
1406 |
92%|ββββββββββ| 9845/10682 [1:24:04<06:52, 2.03it/s]
|
1407 |
92%|ββββββββββ| 9846/10682 [1:24:05<06:51, 2.03it/s]
|
1408 |
92%|ββββββββββ| 9847/10682 [1:24:05<06:51, 2.03it/s]
|
1409 |
92%|ββββββββββ| 9848/10682 [1:24:06<06:51, 2.03it/s]
|
1410 |
92%|ββββββββββ| 9849/10682 [1:24:06<06:51, 2.03it/s]
|
1411 |
92%|ββββββββββ| 9850/10682 [1:24:07<06:50, 2.03it/s]
|
1412 |
|
1413 |
+
|
1414 |
92%|ββββββββββ| 9850/10682 [1:24:07<06:50, 2.03it/s]
|
1415 |
92%|ββββββββββ| 9851/10682 [1:24:07<06:50, 2.02it/s]
|
1416 |
92%|ββββββββββ| 9852/10682 [1:24:08<06:49, 2.03it/s]
|
1417 |
92%|ββββββββββ| 9853/10682 [1:24:08<06:49, 2.02it/s]
|
1418 |
92%|ββββββββββ| 9854/10682 [1:24:09<06:48, 2.03it/s]
|
1419 |
92%|ββββββββββ| 9855/10682 [1:24:09<06:48, 2.03it/s]
|
1420 |
92%|ββββββββββ| 9856/10682 [1:24:10<06:47, 2.03it/s]
|
1421 |
92%|ββββββββββ| 9857/10682 [1:24:10<06:47, 2.03it/s]
|
1422 |
92%|ββββββββββ| 9858/10682 [1:24:11<06:46, 2.03it/s]
|
1423 |
92%|ββββββββββ| 9859/10682 [1:24:11<06:45, 2.03it/s]
|
1424 |
92%|ββββββββββ| 9860/10682 [1:24:12<06:46, 2.02it/s]
|
1425 |
92%|ββββββββββ| 9861/10682 [1:24:12<06:45, 2.03it/s]
|
1426 |
92%|ββββββββββ| 9862/10682 [1:24:13<06:44, 2.03it/s]
|
1427 |
92%|ββββββββββ| 9863/10682 [1:24:13<06:43, 2.03it/s]
|
1428 |
92%|ββββββββββ| 9864/10682 [1:24:14<06:43, 2.03it/s]
|
1429 |
92%|ββββββββββ| 9865/10682 [1:24:14<06:42, 2.03it/s]
|
1430 |
92%|ββββββββββ| 9866/10682 [1:24:15<06:42, 2.03it/s]
|
1431 |
92%|ββββββββββ| 9867/10682 [1:24:15<06:41, 2.03it/s]
|
1432 |
92%|ββββββββββ| 9868/10682 [1:24:16<06:41, 2.03it/s]
|
1433 |
92%|ββββββββββ| 9869/10682 [1:24:16<06:40, 2.03it/s]
|
1434 |
92%|ββββββββββ| 9870/10682 [1:24:17<06:40, 2.03it/s]
|
1435 |
92%|ββββββββββ| 9871/10682 [1:24:17<06:39, 2.03it/s]
|
1436 |
92%|ββββββββββ| 9872/10682 [1:24:18<06:39, 2.03it/s]
|
1437 |
92%|ββββββββββ| 9873/10682 [1:24:18<06:38, 2.03it/s]
|
1438 |
92%|ββββββββββ| 9874/10682 [1:24:19<06:38, 2.03it/s]
|
1439 |
92%|ββββββββββ| 9875/10682 [1:24:19<06:37, 2.03it/s]
|
1440 |
{'loss': 2.6349, 'grad_norm': 0.27050691843032837, 'learning_rate': 1.7288216615031272e-05, 'epoch': 12.93}
|
1441 |
+
|
1442 |
92%|ββββββββββ| 9875/10682 [1:24:19<06:37, 2.03it/s]
|
1443 |
92%|ββββββββββ| 9876/10682 [1:24:19<06:37, 2.03it/s]
|
1444 |
92%|ββββββββββ| 9877/10682 [1:24:20<06:37, 2.03it/s]
|
1445 |
92%|ββββββββββ| 9878/10682 [1:24:20<06:36, 2.03it/s]
|
1446 |
92%|ββββββββββ| 9879/10682 [1:24:21<06:35, 2.03it/s]
|
1447 |
92%|ββββββββββ| 9880/10682 [1:24:21<06:35, 2.03it/s]
|
1448 |
93%|ββββββββββ| 9881/10682 [1:24:22<06:34, 2.03it/s]
|
1449 |
93%|ββββββββββ| 9882/10682 [1:24:22<06:34, 2.03it/s]
|
1450 |
93%|ββββββββββ| 9883/10682 [1:24:23<06:33, 2.03it/s]
|
1451 |
93%|ββββββββββ| 9884/10682 [1:24:23<06:34, 2.03it/s]
|
1452 |
93%|ββββββββββ| 9885/10682 [1:24:24<06:33, 2.03it/s]
|
1453 |
93%|ββββββββββ| 9886/10682 [1:24:24<06:32, 2.03it/s]
|
1454 |
93%|ββββββββββ| 9887/10682 [1:24:25<06:32, 2.03it/s]
|
1455 |
93%|ββββββββββ| 9888/10682 [1:24:25<06:30, 2.03it/s]
|
1456 |
93%|ββββββββββ| 9889/10682 [1:24:26<06:31, 2.03it/s]
|
1457 |
93%|ββββββββββ| 9890/10682 [1:24:26<06:30, 2.03it/s]
|
1458 |
93%|ββββββββββ| 9891/10682 [1:24:27<06:29, 2.03it/s]
|
1459 |
93%|ββββββββββ| 9892/10682 [1:24:27<06:29, 2.03it/s]
|
1460 |
93%|ββββββββββ| 9893/10682 [1:24:28<06:28, 2.03it/s]
|
1461 |
93%|ββββββββββ| 9894/10682 [1:24:28<06:28, 2.03it/s]
|
1462 |
93%|ββββββββββ| 9895/10682 [1:24:29<06:27, 2.03it/s]
|
1463 |
93%|ββββββββββ| 9896/10682 [1:24:29<06:27, 2.03it/s]
|
1464 |
93%|ββββββββββ| 9897/10682 [1:24:30<06:26, 2.03it/s]
|
1465 |
93%|ββββββββββ| 9898/10682 [1:24:30<06:26, 2.03it/s]
|
1466 |
93%|ββββββββββ| 9899/10682 [1:24:31<06:25, 2.03it/s]
|
1467 |
93%|ββββββββββ| 9900/10682 [1:24:31<06:25, 2.03it/s]{'loss': 2.6377, 'grad_norm': 0.2702299952507019, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
1468 |
+
|
1469 |
|
1470 |
93%|ββββββββββ| 9900/10682 [1:24:31<06:25, 2.03it/s]
|
1471 |
93%|ββββββββββ| 9901/10682 [1:24:32<06:25, 2.03it/s]
|
1472 |
93%|ββββββββββ| 9902/10682 [1:24:32<06:24, 2.03it/s]
|
1473 |
93%|ββββββββββ| 9903/10682 [1:24:33<06:24, 2.03it/s]
|
1474 |
93%|ββββββββββ| 9904/10682 [1:24:33<06:23, 2.03it/s]
|
1475 |
93%|ββββββββββ| 9905/10682 [1:24:34<06:23, 2.03it/s]
|
1476 |
93%|ββββββββββ| 9906/10682 [1:24:34<06:22, 2.03it/s]
|
1477 |
93%|ββββββββββ| 9907/10682 [1:24:35<06:22, 2.03it/s]
|
1478 |
93%|ββββββββββ| 9908/10682 [1:24:35<06:22, 2.02it/s]
|
1479 |
93%|ββββββββββ| 9909/10682 [1:24:36<06:21, 2.03it/s]
|
1480 |
93%|ββββββββββ| 9910/10682 [1:24:36<06:21, 2.02it/s]
|
1481 |
93%|ββββββββββ| 9911/10682 [1:24:37<06:20, 2.03it/s]
|
1482 |
93%|ββββββββββ| 9912/10682 [1:24:37<06:19, 2.03it/s]
|
1483 |
93%|ββββββββββ| 9913/10682 [1:24:38<06:19, 2.03it/s]
|
1484 |
93%|ββββββββββ| 9914/10682 [1:24:38<06:18, 2.03it/s]
|
1485 |
93%|ββββββββββ| 9915/10682 [1:24:39<06:18, 2.03it/s]
|
1486 |
93%|ββββββββββ| 9916/10682 [1:24:39<06:17, 2.03it/s]
|
1487 |
93%|ββββββββββ| 9917/10682 [1:24:40<06:17, 2.03it/s]
|
1488 |
93%|ββββββββββ| 9918/10682 [1:24:40<06:16, 2.03it/s]
|
1489 |
93%|ββββββββββ| 9919/10682 [1:24:41<06:16, 2.02it/s]
|
1490 |
93%|ββββββββββ| 9920/10682 [1:24:41<06:16, 2.03it/s]
|
1491 |
93%|ββββββββββ| 9921/10682 [1:24:42<06:15, 2.03it/s]
|
1492 |
93%|ββββββββββ| 9922/10682 [1:24:42<06:15, 2.03it/s]
|
1493 |
93%|ββββββββββ| 9923/10682 [1:24:43<06:14, 2.02it/s]
|
1494 |
93%|ββββββββββ| 9924/10682 [1:24:43<06:14, 2.02it/s]
|
1495 |
93%|ββββββββββ| 9925/10682 [1:24:44<06:22, 1.98it/s]{'loss': 2.6435, 'grad_norm': 0.2638016641139984, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.0}
|
1496 |
+
|
1497 |
|
1498 |
93%|ββββββββββ| 9925/10682 [1:24:44<06:22, 1.98it/s]
|
1499 |
93%|ββββββββββ| 9926/10682 [1:25:02<1:14:46, 5.93s/it]
|
1500 |
93%|ββββββββββ| 9927/10682 [1:25:03<54:08, 4.30s/it]
|
1501 |
93%|ββββββββββ| 9928/10682 [1:25:03<39:42, 3.16s/it]
|
1502 |
93%|ββββββββββ| 9929/10682 [1:25:04<29:39, 2.36s/it]
|
1503 |
93%|ββββββββββ| 9930/10682 [1:25:04<22:35, 1.80s/it]
|
1504 |
93%|ββββββββββ| 9931/10682 [1:25:05<17:38, 1.41s/it]
|
1505 |
93%|ββββββββββ| 9932/10682 [1:25:05<14:10, 1.13s/it]
|
1506 |
93%|ββββββββββ| 9933/10682 [1:25:06<11:45, 1.06it/s]
|
1507 |
93%|ββββββββββ| 9934/10682 [1:25:06<10:03, 1.24it/s]
|
1508 |
93%|ββββββββββ| 9935/10682 [1:25:07<08:55, 1.40it/s]
|
1509 |
93%|ββββββββββ| 9936/10682 [1:25:07<08:04, 1.54it/s]
|
1510 |
93%|ββββββββββ| 9937/10682 [1:25:08<07:29, 1.66it/s]
|
1511 |
93%|ββββββββββ| 9938/10682 [1:25:08<07:04, 1.75it/s]
|
1512 |
93%|ββββββββββ| 9939/10682 [1:25:09<06:47, 1.82it/s]
|
1513 |
93%|ββββββββββ| 9940/10682 [1:25:09<06:36, 1.87it/s]
|
1514 |
93%|ββββββββββ| 9941/10682 [1:25:10<06:26, 1.92it/s]
|
1515 |
93%|ββββββββββ| 9942/10682 [1:25:10<06:19, 1.95it/s]
|
1516 |
93%|ββββββββββ| 9943/10682 [1:25:11<06:14, 1.97it/s]
|
1517 |
93%|ββββββββββ| 9944/10682 [1:25:11<06:11, 1.99it/s]
|
1518 |
93%|ββββββββββ| 9945/10682 [1:25:12<06:09, 2.00it/s]
|
1519 |
93%|ββββββββββ| 9946/10682 [1:25:12<06:07, 2.00it/s]
|
1520 |
93%|ββββββββββ| 9947/10682 [1:25:13<06:05, 2.01it/s]
|
1521 |
93%|ββββββββββ| 9948/10682 [1:25:13<06:04, 2.01it/s]
|
1522 |
93%|ββββββββββ| 9949/10682 [1:25:14<06:04, 2.01it/s]
|
1523 |
93%|ββββββββββ| 9950/10682 [1:25:14<06:02, 2.02it/s]{'loss': 2.5985, 'grad_norm': 0.2674480974674225, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.03}
|
1524 |
+
|
1525 |
|
1526 |
93%|ββββββββββ| 9950/10682 [1:25:14<06:02, 2.02it/s]
|
1527 |
93%|ββββββββββ| 9951/10682 [1:25:15<06:02, 2.02it/s]
|