Training in progress, epoch 12
Browse files- logs/events.out.tfevents.1715485378.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +32 -1
logs/events.out.tfevents.1715485378.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f6eda382822333d3e0fb58eda7d6c08443a03c9086314112a65e1983ec9de9d
|
3 |
+
size 81841
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93f05a8c0ba3bedaf26af6077768cd7c73e13a720bc5c026755d8230719bee8d
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -557,4 +557,35 @@ command outputs:
|
|
557 |
|
558 |
78%|ββββββββ | 8325/10682 [1:10:33<19:19, 2.03it/s]
|
559 |
78%|ββββββββ | 8326/10682 [1:10:33<19:21, 2.03it/s]
|
560 |
78%|ββββββββ | 8327/10682 [1:10:34<19:21, 2.03it/s]
|
561 |
78%|ββββββββ | 8328/10682 [1:10:34<19:20, 2.03it/s]
|
562 |
78%|ββββββββ | 8329/10682 [1:10:35<19:18, 2.03it/s]
|
563 |
78%|ββββββββ | 8330/10682 [1:10:35<19:17, 2.03it/s]
|
564 |
78%|ββββββββ | 8331/10682 [1:10:36<19:18, 2.03it/s]
|
565 |
78%|ββββββββ | 8332/10682 [1:10:36<19:16, 2.03it/s]
|
566 |
78%|ββββββββ | 8333/10682 [1:10:37<19:16, 2.03it/s]
|
567 |
78%|ββββββββ | 8334/10682 [1:10:37<19:16, 2.03it/s]
|
568 |
78%|ββββββββ | 8335/10682 [1:10:38<19:15, 2.03it/s]
|
569 |
78%|ββββββββ | 8336/10682 [1:10:38<19:15, 2.03it/s]
|
570 |
78%|ββββββββ | 8337/10682 [1:10:39<19:13, 2.03it/s]
|
571 |
78%|ββββββββ | 8338/10682 [1:10:39<19:13, 2.03it/s]
|
572 |
78%|ββββββββ | 8339/10682 [1:10:40<19:13, 2.03it/s]
|
573 |
78%|ββββββββ | 8340/10682 [1:10:40<19:12, 2.03it/s]
|
574 |
78%|ββββββββ | 8341/10682 [1:10:41<19:11, 2.03it/s]
|
575 |
78%|ββββββββ | 8342/10682 [1:10:41<19:12, 2.03it/s]
|
576 |
78%|ββββββββ | 8343/10682 [1:10:42<19:13, 2.03it/s]
|
577 |
78%|ββββββββ | 8344/10682 [1:10:42<19:12, 2.03it/s]
|
578 |
78%|ββββββββ | 8345/10682 [1:10:43<19:10, 2.03it/s]
|
579 |
78%|ββββββββ | 8346/10682 [1:10:43<19:09, 2.03it/s]
|
580 |
78%|ββββββββ | 8347/10682 [1:10:44<19:10, 2.03it/s]
|
581 |
78%|ββββββββ | 8348/10682 [1:10:44<19:09, 2.03it/s]
|
582 |
78%|ββββββββ | 8349/10682 [1:10:45<19:07, 2.03it/s]
|
583 |
78%|ββββββββ | 8350/10682 [1:10:45<19:07, 2.03it/s]{'loss': 2.9119, 'grad_norm': 0.256185919046402, 'learning_rate': 0.00013831071326327282, 'epoch': 10.94}
|
584 |
|
585 |
|
586 |
78%|ββββββββ | 8350/10682 [1:10:45<19:07, 2.03it/s]
|
587 |
78%|ββββββββ | 8351/10682 [1:10:46<19:08, 2.03it/s]
|
588 |
78%|ββββββββ | 8352/10682 [1:10:46<19:09, 2.03it/s]
|
589 |
78%|ββββββββ | 8353/10682 [1:10:47<19:07, 2.03it/s]
|
590 |
78%|ββββββββ | 8354/10682 [1:10:47<19:08, 2.03it/s]
|
591 |
78%|ββββββββ | 8355/10682 [1:10:48<19:07, 2.03it/s]
|
592 |
78%|ββββββββ | 8356/10682 [1:10:48<19:06, 2.03it/s]
|
593 |
78%|ββββββββ | 8357/10682 [1:10:49<19:06, 2.03it/s]
|
594 |
78%|ββββββββ | 8358/10682 [1:10:49<19:04, 2.03it/s]
|
595 |
78%|ββββββββ | 8359/10682 [1:10:50<19:04, 2.03it/s]
|
596 |
78%|ββββββββ | 8360/10682 [1:10:50<19:03, 2.03it/s]
|
597 |
78%|ββββββββ | 8361/10682 [1:10:51<19:02, 2.03it/s]
|
598 |
78%|ββββββββ | 8362/10682 [1:10:51<19:03, 2.03it/s]
|
599 |
78%|ββββββββ | 8363/10682 [1:10:52<19:01, 2.03it/s]
|
600 |
78%|ββββββββ | 8364/10682 [1:10:52<19:00, 2.03it/s]
|
601 |
78%|ββββββββ | 8365/10682 [1:10:53<19:00, 2.03it/s]
|
602 |
78%|ββββββββ | 8366/10682 [1:10:53<18:59, 2.03it/s]
|
603 |
78%|ββββββββ | 8367/10682 [1:10:54<18:58, 2.03it/s]
|
604 |
78%|ββββββββ | 8368/10682 [1:10:54<18:58, 2.03it/s]
|
605 |
78%|ββββββββ | 8369/10682 [1:10:55<18:57, 2.03it/s]
|
606 |
78%|ββββββββ | 8370/10682 [1:10:55<18:59, 2.03it/s]
|
607 |
78%|ββββββββ | 8371/10682 [1:10:56<18:57, 2.03it/s]
|
608 |
78%|ββββββββ | 8372/10682 [1:10:56<18:58, 2.03it/s]
|
609 |
78%|ββββββββ | 8373/10682 [1:10:57<18:57, 2.03it/s]
|
610 |
78%|ββββββββ | 8374/10682 [1:10:57<18:56, 2.03it/s]
|
611 |
78%|ββββββββ | 8375/10682 [1:10:58<18:57, 2.03it/s]{'loss': 2.9219, 'grad_norm': 0.2577957808971405, 'learning_rate': 0.0001355022636953933, 'epoch': 10.98}
|
612 |
|
613 |
|
614 |
78%|ββββββββ | 8375/10682 [1:10:58<18:57, 2.03it/s]
|
615 |
78%|ββββββββ | 8376/10682 [1:10:58<18:57, 2.03it/s]
|
616 |
78%|ββββββββ | 8377/10682 [1:10:59<18:58, 2.03it/s]
|
617 |
78%|ββββββββ | 8378/10682 [1:10:59<18:57, 2.03it/s]
|
618 |
78%|ββββββββ | 8379/10682 [1:11:00<18:56, 2.03it/s]
|
619 |
78%|ββββββββ | 8380/10682 [1:11:00<18:55, 2.03it/s]
|
620 |
78%|ββββββββ | 8381/10682 [1:11:01<18:54, 2.03it/s]
|
621 |
78%|ββββββββ | 8382/10682 [1:11:01<18:54, 2.03it/s]
|
622 |
78%|ββββββββ | 8383/10682 [1:11:02<18:53, 2.03it/s]
|
623 |
78%|ββββββββ | 8384/10682 [1:11:02<18:52, 2.03it/s]
|
624 |
78%|ββββββββ | 8385/10682 [1:11:03<18:51, 2.03it/s]
|
625 |
79%|ββββββββ | 8386/10682 [1:11:03<18:51, 2.03it/s]
|
626 |
79%|ββββββββ | 8387/10682 [1:11:04<18:51, 2.03it/s]
|
627 |
79%|ββββββββ | 8388/10682 [1:11:04<18:49, 2.03it/s]
|
628 |
79%|ββββββββ | 8389/10682 [1:11:05<18:48, 2.03it/s]
|
629 |
79%|ββββββββ | 8390/10682 [1:11:05<18:48, 2.03it/s]
|
630 |
79%|ββββββββ | 8391/10682 [1:11:06<18:47, 2.03it/s]
|
631 |
79%|ββββββββ | 8392/10682 [1:11:06<18:49, 2.03it/s]
|
632 |
79%|ββββββββ | 8393/10682 [1:11:06<18:36, 2.05it/s]
|
633 |
79%|ββββββββ | 8394/10682 [1:11:21<3:01:51, 4.77s/it]
|
634 |
79%|ββββββββ | 8395/10682 [1:11:22<2:12:51, 3.49s/it]
|
635 |
79%|ββββββββ | 8396/10682 [1:11:22<1:38:43, 2.59s/it]
|
636 |
79%|ββββββββ | 8397/10682 [1:11:23<1:14:42, 1.96s/it]
|
637 |
79%|ββββββββ | 8398/10682 [1:11:23<57:59, 1.52s/it]
|
638 |
79%|ββββββββ | 8399/10682 [1:11:24<46:11, 1.21s/it]
|
639 |
79%|ββββββββ | 8400/10682 [1:11:24<37:57, 1.00it/s]
|
640 |
|
641 |
-
|
642 |
79%|ββββββββ | 8400/10682 [1:11:24<37:57, 1.00it/s]
|
643 |
79%|ββββββββ | 8401/10682 [1:11:25<32:22, 1.17it/s]
|
644 |
79%|ββββββββ | 8402/10682 [1:11:25<28:16, 1.34it/s]
|
645 |
79%|ββββββββ | 8403/10682 [1:11:26<25:23, 1.50it/s]
|
646 |
79%|ββββββββ | 8404/10682 [1:11:26<23:24, 1.62it/s]
|
647 |
79%|ββββββββ | 8405/10682 [1:11:27<21:59, 1.73it/s]
|
648 |
79%|ββββββββ | 8406/10682 [1:11:27<20:58, 1.81it/s]
|
649 |
79%|ββββββββ | 8407/10682 [1:11:28<20:16, 1.87it/s]
|
650 |
79%|ββββββββ | 8408/10682 [1:11:28<19:46, 1.92it/s]
|
651 |
79%|ββββββββ | 8409/10682 [1:11:29<19:26, 1.95it/s]
|
652 |
79%|ββββββββ | 8410/10682 [1:11:29<19:11, 1.97it/s]
|
653 |
79%|ββββββββ | 8411/10682 [1:11:30<19:04, 1.98it/s]
|
654 |
79%|ββββββββ | 8412/10682 [1:11:30<18:54, 2.00it/s]
|
655 |
79%|ββββββββ | 8413/10682 [1:11:31<18:48, 2.01it/s]
|
656 |
79%|ββββββββ | 8414/10682 [1:11:31<18:46, 2.01it/s]
|
657 |
79%|ββββββββ | 8415/10682 [1:11:32<18:43, 2.02it/s]
|
658 |
79%|ββββββββ | 8416/10682 [1:11:32<18:41, 2.02it/s]
|
659 |
79%|ββββββββ | 8417/10682 [1:11:33<18:38, 2.02it/s]
|
660 |
79%|ββββββββ | 8418/10682 [1:11:33<18:38, 2.02it/s]
|
|
|
661 |
79%|ββββββββ | 8400/10682 [1:11:24<37:57, 1.00it/s]
|
662 |
79%|ββββββββ | 8401/10682 [1:11:25<32:22, 1.17it/s]
|
663 |
79%|ββββββββ | 8402/10682 [1:11:25<28:16, 1.34it/s]
|
664 |
79%|ββββββββ | 8403/10682 [1:11:26<25:23, 1.50it/s]
|
665 |
79%|ββββββββ | 8404/10682 [1:11:26<23:24, 1.62it/s]
|
666 |
79%|ββββββββ | 8405/10682 [1:11:27<21:59, 1.73it/s]
|
667 |
79%|ββββββββ | 8406/10682 [1:11:27<20:58, 1.81it/s]
|
668 |
79%|ββββββββ | 8407/10682 [1:11:28<20:16, 1.87it/s]
|
669 |
79%|ββββββββ | 8408/10682 [1:11:28<19:46, 1.92it/s]
|
670 |
79%|ββββββββ | 8409/10682 [1:11:29<19:26, 1.95it/s]
|
671 |
79%|ββββββββ | 8410/10682 [1:11:29<19:11, 1.97it/s]
|
672 |
79%|ββββββββ | 8411/10682 [1:11:30<19:04, 1.98it/s]
|
673 |
79%|ββββββββ | 8412/10682 [1:11:30<18:54, 2.00it/s]
|
674 |
79%|ββββββββ | 8413/10682 [1:11:31<18:48, 2.01it/s]
|
675 |
79%|ββββββββ | 8414/10682 [1:11:31<18:46, 2.01it/s]
|
676 |
79%|ββββββββ | 8415/10682 [1:11:32<18:43, 2.02it/s]
|
677 |
79%|ββββββββ | 8416/10682 [1:11:32<18:41, 2.02it/s]
|
678 |
79%|ββββββββ | 8417/10682 [1:11:33<18:38, 2.02it/s]
|
679 |
79%|ββββββββ | 8418/10682 [1:11:33<18:38, 2.02it/s]
|
680 |
79%|ββββββββ | 8419/10682 [1:11:34<18:36, 2.03it/s]
|
681 |
79%|ββββββββ | 8420/10682 [1:11:34<18:36, 2.03it/s]
|
682 |
79%|ββββββββ | 8421/10682 [1:11:35<18:35, 2.03it/s]
|
683 |
79%|ββββββββ | 8422/10682 [1:11:35<18:33, 2.03it/s]
|
684 |
79%|ββββββββ | 8423/10682 [1:11:36<18:33, 2.03it/s]
|
685 |
79%|ββββββββ | 8424/10682 [1:11:36<18:32, 2.03it/s]
|
686 |
79%|ββββββββ | 8425/10682 [1:11:37<18:32, 2.03it/s]
|
687 |
|
|
|
688 |
79%|ββββββββ | 8425/10682 [1:11:37<18:32, 2.03it/s]
|
689 |
79%|ββββββββ | 8426/10682 [1:11:37<18:32, 2.03it/s]
|
690 |
79%|ββββββββ | 8427/10682 [1:11:38<18:30, 2.03it/s]
|
691 |
79%|ββββββββ | 8428/10682 [1:11:38<18:30, 2.03it/s]
|
692 |
79%|ββββββββ | 8429/10682 [1:11:39<18:29, 2.03it/s]
|
693 |
79%|ββββββββ | 8430/10682 [1:11:39<18:30, 2.03it/s]
|
694 |
79%|ββββββββ | 8431/10682 [1:11:40<18:29, 2.03it/s]
|
695 |
79%|ββββββββ | 8432/10682 [1:11:40<18:29, 2.03it/s]
|
696 |
79%|ββββββββ | 8433/10682 [1:11:40<18:28, 2.03it/s]
|
697 |
79%|ββββββββ | 8434/10682 [1:11:41<18:27, 2.03it/s]
|
698 |
79%|ββββββββ | 8435/10682 [1:11:41<18:28, 2.03it/s]
|
699 |
79%|ββββββββ | 8436/10682 [1:11:42<18:27, 2.03it/s]
|
700 |
79%|ββββββββ | 8437/10682 [1:11:42<18:27, 2.03it/s]
|
701 |
79%|ββββββββ | 8438/10682 [1:11:43<18:26, 2.03it/s]
|
702 |
79%|ββββββββ | 8439/10682 [1:11:43<18:23, 2.03it/s]
|
703 |
79%|ββββββββ | 8440/10682 [1:11:44<18:24, 2.03it/s]
|
704 |
79%|ββββββββ | 8441/10682 [1:11:44<18:23, 2.03it/s]
|
705 |
79%|ββββββββ | 8442/10682 [1:11:45<18:21, 2.03it/s]
|
706 |
79%|ββββββββ | 8443/10682 [1:11:45<18:22, 2.03it/s]
|
707 |
79%|ββββββββ | 8444/10682 [1:11:46<18:20, 2.03it/s]
|
708 |
79%|ββββββββ | 8445/10682 [1:11:46<18:20, 2.03it/s]
|
709 |
79%|ββββββββ | 8446/10682 [1:11:47<18:20, 2.03it/s]
|
710 |
79%|ββββββββ | 8447/10682 [1:11:47<18:19, 2.03it/s]
|
711 |
79%|ββββββββ | 8448/10682 [1:11:48<18:21, 2.03it/s]
|
712 |
79%|ββββββββ | 8449/10682 [1:11:48<18:21, 2.03it/s]
|
713 |
79%|ββββββββ | 8450/10682 [1:11:49<18:20, 2.03it/s]
|
714 |
|
|
|
715 |
79%|ββββββββ | 8450/10682 [1:11:49<18:20, 2.03it/s]
|
716 |
79%|ββββββββ | 8451/10682 [1:11:49<18:21, 2.03it/s]
|
717 |
79%|ββββββββ | 8452/10682 [1:11:50<18:20, 2.03it/s]
|
718 |
79%|ββββββββ | 8453/10682 [1:11:50<18:19, 2.03it/s]
|
719 |
79%|ββββββββ | 8454/10682 [1:11:51<18:18, 2.03it/s]
|
720 |
79%|ββββββββ | 8455/10682 [1:11:51<18:17, 2.03it/s]
|
721 |
79%|ββββββββ | 8456/10682 [1:11:52<18:17, 2.03it/s]
|
722 |
79%|ββββββββ | 8457/10682 [1:11:52<18:15, 2.03it/s]
|
723 |
79%|ββββββββ | 8458/10682 [1:11:53<18:15, 2.03it/s]
|
724 |
79%|ββββββββ | 8459/10682 [1:11:53<18:15, 2.03it/s]
|
725 |
79%|ββββββββ | 8460/10682 [1:11:54<18:15, 2.03it/s]
|
726 |
79%|ββββββββ | 8461/10682 [1:11:54<18:14, 2.03it/s]
|
727 |
79%|ββββββββ | 8462/10682 [1:11:55<18:13, 2.03it/s]
|
728 |
79%|ββββββββ | 8463/10682 [1:11:55<18:13, 2.03it/s]
|
729 |
79%|ββββββββ | 8464/10682 [1:11:56<18:12, 2.03it/s]
|
730 |
79%|ββββββββ | 8465/10682 [1:11:56<18:11, 2.03it/s]
|
731 |
79%|ββββββββ | 8466/10682 [1:11:57<18:12, 2.03it/s]
|
732 |
79%|ββββββββ | 8467/10682 [1:11:57<18:11, 2.03it/s]
|
733 |
79%|ββββββββ | 8468/10682 [1:11:58<18:10, 2.03it/s]
|
734 |
79%|ββββββββ | 8469/10682 [1:11:58<18:10, 2.03it/s]
|
735 |
79%|ββββββββ | 8470/10682 [1:11:59<18:08, 2.03it/s]
|
736 |
79%|ββββββββ | 8471/10682 [1:11:59<18:08, 2.03it/s]
|
737 |
79%|ββββββββ | 8472/10682 [1:12:00<18:06, 2.03it/s]
|
738 |
79%|ββββββββ | 8473/10682 [1:12:00<18:06, 2.03it/s]
|
739 |
79%|ββββββββ | 8474/10682 [1:12:01<18:06, 2.03it/s]
|
740 |
79%|ββββββββ | 8475/10682 [1:12:01<18:05, 2.03it/s]
|
741 |
{'loss': 2.819, 'grad_norm': 0.2613165080547333, 'learning_rate': 0.00012451362249196797, 'epoch': 11.11}
|
|
|
742 |
79%|ββββββββ | 8475/10682 [1:12:01<18:05, 2.03it/s]
|
743 |
79%|ββββββββ | 8476/10682 [1:12:02<18:06, 2.03it/s]
|
744 |
79%|ββββββββ | 8477/10682 [1:12:02<18:05, 2.03it/s]
|
745 |
79%|ββββββββ | 8478/10682 [1:12:03<18:05, 2.03it/s]
|
746 |
79%|ββββββββ | 8479/10682 [1:12:03<18:05, 2.03it/s]
|
747 |
79%|ββββββββ | 8480/10682 [1:12:04<18:04, 2.03it/s]
|
748 |
79%|ββββββββ | 8481/10682 [1:12:04<18:04, 2.03it/s]
|
749 |
79%|ββββββββ | 8482/10682 [1:12:05<18:02, 2.03it/s]
|
750 |
79%|ββββββββ | 8483/10682 [1:12:05<18:02, 2.03it/s]
|
751 |
79%|ββββββββ | 8484/10682 [1:12:06<18:01, 2.03it/s]
|
752 |
79%|ββββββββ | 8485/10682 [1:12:06<18:00, 2.03it/s]
|
753 |
79%|ββββββββ | 8486/10682 [1:12:07<18:00, 2.03it/s]
|
754 |
79%|ββββββββ | 8487/10682 [1:12:07<18:00, 2.03it/s]
|
755 |
79%|ββββββββ | 8488/10682 [1:12:08<18:00, 2.03it/s]
|
756 |
79%|ββββββββ | 8489/10682 [1:12:08<18:00, 2.03it/s]
|
757 |
79%|ββββββββ | 8490/10682 [1:12:09<17:59, 2.03it/s]
|
758 |
79%|ββββββββ | 8491/10682 [1:12:09<18:00, 2.03it/s]
|
759 |
79%|ββββββββ | 8492/10682 [1:12:10<17:59, 2.03it/s]
|
760 |
80%|ββββββββ | 8493/10682 [1:12:10<17:59, 2.03it/s]
|
761 |
80%|ββββββββ | 8494/10682 [1:12:11<17:57, 2.03it/s]
|
762 |
80%|ββββββββ | 8495/10682 [1:12:11<17:57, 2.03it/s]
|
763 |
80%|ββββββββ | 8496/10682 [1:12:12<17:56, 2.03it/s]
|
764 |
80%|ββββββββ | 8497/10682 [1:12:12<17:56, 2.03it/s]
|
765 |
80%|ββββββββ | 8498/10682 [1:12:13<17:55, 2.03it/s]
|
766 |
80%|ββββββββ | 8499/10682 [1:12:13<17:55, 2.03it/s]
|
767 |
80%|ββββββββ | 8500/10682 [1:12:13<17:53, 2.03it/s]
|
768 |
|
|
|
769 |
80%|ββββββββ | 8500/10682 [1:12:13<17:53, 2.03it/s]
|
770 |
80%|ββββββββ | 8501/10682 [1:12:14<17:56, 2.03it/s]
|
771 |
80%|ββββββββ | 8502/10682 [1:12:14<17:54, 2.03it/s]
|
772 |
80%|ββββββββ | 8503/10682 [1:12:15<17:53, 2.03it/s]
|
773 |
80%|ββββββββ | 8504/10682 [1:12:15<17:54, 2.03it/s]
|
774 |
80%|ββββββββ | 8505/10682 [1:12:16<17:52, 2.03it/s]
|
775 |
80%|ββββββββ | 8506/10682 [1:12:16<17:51, 2.03it/s]
|
776 |
80%|ββββββββ | 8507/10682 [1:12:17<17:51, 2.03it/s]
|
777 |
80%|ββββββββ | 8508/10682 [1:12:17<17:49, 2.03it/s]
|
778 |
80%|ββββββββ | 8509/10682 [1:12:18<17:50, 2.03it/s]
|
779 |
80%|ββββββββ | 8510/10682 [1:12:18<17:48, 2.03it/s]
|
780 |
80%|ββββββββ | 8511/10682 [1:12:19<17:48, 2.03it/s]
|
781 |
80%|ββββββββ | 8512/10682 [1:12:19<17:47, 2.03it/s]
|
782 |
80%|ββββββββ | 8513/10682 [1:12:20<17:47, 2.03it/s]
|
783 |
80%|ββββββββ | 8514/10682 [1:12:20<17:46, 2.03it/s]
|
784 |
80%|ββββββββ | 8515/10682 [1:12:21<17:46, 2.03it/s]
|
785 |
80%|ββββββββ | 8516/10682 [1:12:21<17:46, 2.03it/s]
|
786 |
80%|ββββββββ | 8517/10682 [1:12:22<17:45, 2.03it/s]
|
787 |
80%|ββββββββ | 8518/10682 [1:12:22<17:44, 2.03it/s]
|
788 |
80%|ββββββββ | 8519/10682 [1:12:23<17:44, 2.03it/s]
|
789 |
80%|ββββββββ | 8520/10682 [1:12:23<17:43, 2.03it/s]
|
790 |
80%|ββββββββ | 8521/10682 [1:12:24<17:43, 2.03it/s]
|
791 |
80%|ββββββββ | 8522/10682 [1:12:24<17:43, 2.03it/s]
|
792 |
80%|ββββββββ | 8523/10682 [1:12:25<17:41, 2.03it/s]
|
793 |
80%|ββββββββ | 8524/10682 [1:12:25<17:41, 2.03it/s]
|
794 |
80%|ββββββββ | 8525/10682 [1:12:26<17:41, 2.03it/s]
|
795 |
|
|
|
796 |
80%|ββββββββ | 8525/10682 [1:12:26<17:41, 2.03it/s]
|
797 |
80%|ββββββββ | 8526/10682 [1:12:26<17:42, 2.03it/s]
|
798 |
80%|ββββββββ | 8527/10682 [1:12:27<17:42, 2.03it/s]
|
799 |
80%|ββββββββ | 8528/10682 [1:12:27<17:40, 2.03it/s]
|
800 |
80%|ββββββββ | 8529/10682 [1:12:28<17:40, 2.03it/s]
|
801 |
80%|ββββββββ | 8530/10682 [1:12:28<17:39, 2.03it/s]
|
802 |
80%|ββββββββ | 8531/10682 [1:12:29<17:38, 2.03it/s]
|
803 |
80%|ββββββββ | 8532/10682 [1:12:29<17:39, 2.03it/s]
|
804 |
80%|ββββββββ | 8533/10682 [1:12:30<17:37, 2.03it/s]
|
805 |
80%|ββββββββ | 8534/10682 [1:12:30<17:36, 2.03it/s]
|
806 |
80%|ββββββββ | 8535/10682 [1:12:31<17:36, 2.03it/s]
|
807 |
80%|ββββββββ | 8536/10682 [1:12:31<17:36, 2.03it/s]
|
808 |
80%|ββββββββ | 8537/10682 [1:12:32<17:35, 2.03it/s]
|
809 |
80%|ββββββββ | 8538/10682 [1:12:32<17:35, 2.03it/s]
|
810 |
80%|ββββββββ | 8539/10682 [1:12:33<17:34, 2.03it/s]
|
811 |
80%|ββββββββ | 8540/10682 [1:12:33<17:34, 2.03it/s]
|
812 |
80%|ββββββββ | 8541/10682 [1:12:34<17:32, 2.03it/s]
|
813 |
80%|ββββββββ | 8542/10682 [1:12:34<17:32, 2.03it/s]
|
814 |
80%|ββββββββ | 8543/10682 [1:12:35<17:32, 2.03it/s]
|
815 |
80%|ββββββββ | 8544/10682 [1:12:35<17:31, 2.03it/s]
|
816 |
80%|ββββββββ | 8545/10682 [1:12:36<17:32, 2.03it/s]
|
817 |
80%|ββββββββ | 8546/10682 [1:12:36<17:31, 2.03it/s]
|
818 |
80%|ββββββββ | 8547/10682 [1:12:37<17:31, 2.03it/s]
|
819 |
80%|ββββββββ | 8548/10682 [1:12:37<17:31, 2.03it/s]
|
820 |
80%|ββββββββ | 8549/10682 [1:12:38<17:29, 2.03it/s]
|
821 |
80%|ββββββββ | 8550/10682 [1:12:38<17:31, 2.03it/s]{'loss': 2.8274, 'grad_norm': 0.271226167678833, 'learning_rate': 0.0001165346678316832, 'epoch': 11.21}
|
|
|
822 |
|
823 |
80%|ββββββββ | 8550/10682 [1:12:38<17:31, 2.03it/s]
|
824 |
80%|ββββββββ | 8551/10682 [1:12:39<17:32, 2.03it/s]
|
825 |
80%|ββββββββ | 8552/10682 [1:12:39<17:31, 2.03it/s]
|
826 |
80%|ββββββββ | 8553/10682 [1:12:40<17:30, 2.03it/s]
|
827 |
80%|ββββββββ | 8554/10682 [1:12:40<17:30, 2.03it/s]
|
828 |
80%|ββββββββ | 8555/10682 [1:12:41<17:29, 2.03it/s]
|
829 |
80%|ββββββββ | 8556/10682 [1:12:41<17:28, 2.03it/s]
|
830 |
80%|ββββββββ | 8557/10682 [1:12:42<17:27, 2.03it/s]
|
831 |
80%|ββββββββ | 8558/10682 [1:12:42<17:26, 2.03it/s]
|
832 |
80%|ββββββββ | 8559/10682 [1:12:43<17:27, 2.03it/s]
|
833 |
80%|ββββββββ | 8560/10682 [1:12:43<17:25, 2.03it/s]
|
834 |
80%|ββββββββ | 8561/10682 [1:12:44<17:25, 2.03it/s]
|
835 |
80%|ββββββββ | 8562/10682 [1:12:44<17:25, 2.03it/s]
|
836 |
80%|ββββββββ | 8563/10682 [1:12:45<17:23, 2.03it/s]
|
837 |
80%|ββββββββ | 8564/10682 [1:12:45<17:23, 2.03it/s]
|
838 |
80%|ββββββββ | 8565/10682 [1:12:46<17:22, 2.03it/s]
|
839 |
80%|ββββββββ | 8566/10682 [1:12:46<17:21, 2.03it/s]
|
840 |
80%|ββββββββ | 8567/10682 [1:12:46<17:21, 2.03it/s]
|
841 |
80%|ββββββββ | 8568/10682 [1:12:47<17:20, 2.03it/s]
|
842 |
80%|ββββββββ | 8569/10682 [1:12:47<17:20, 2.03it/s]
|
843 |
80%|ββββββββ | 8570/10682 [1:12:48<17:19, 2.03it/s]
|
844 |
80%|ββββββββ | 8571/10682 [1:12:48<17:18, 2.03it/s]
|
845 |
80%|ββββββββ | 8572/10682 [1:12:49<17:18, 2.03it/s]
|
846 |
80%|ββββββββ | 8573/10682 [1:12:49<17:17, 2.03it/s]
|
847 |
80%|ββββββββ | 8574/10682 [1:12:50<17:16, 2.03it/s]
|
848 |
80%|ββββββββ | 8575/10682 [1:12:50<17:16, 2.03it/s]{'loss': 2.8339, 'grad_norm': 0.2710798978805542, 'learning_rate': 0.00011392597484393285, 'epoch': 11.24}
|
849 |
|
|
|
850 |
80%|ββββββββ | 8575/10682 [1:12:50<17:16, 2.03it/s]
|
851 |
80%|ββββββββ | 8576/10682 [1:12:51<17:16, 2.03it/s]
|
852 |
80%|ββββββββ | 8577/10682 [1:12:51<17:16, 2.03it/s]
|
853 |
80%|ββββββββ | 8578/10682 [1:12:52<17:16, 2.03it/s]
|
854 |
80%|ββββββββ | 8579/10682 [1:12:52<17:15, 2.03it/s]
|
855 |
80%|ββββββββ | 8580/10682 [1:12:53<17:15, 2.03it/s]
|
856 |
80%|ββββββββ | 8581/10682 [1:12:53<17:13, 2.03it/s]
|
857 |
80%|ββββββββ | 8582/10682 [1:12:54<17:13, 2.03it/s]
|
858 |
80%|ββββββββ | 8583/10682 [1:12:54<17:13, 2.03it/s]
|
859 |
80%|ββββββββ | 8584/10682 [1:12:55<17:12, 2.03it/s]
|
860 |
80%|ββββββββ | 8585/10682 [1:12:55<17:13, 2.03it/s]
|
861 |
80%|ββββββββ | 8586/10682 [1:12:56<17:12, 2.03it/s]
|
862 |
80%|ββββββββ | 8587/10682 [1:12:56<17:10, 2.03it/s]
|
863 |
80%|ββββββββ | 8588/10682 [1:12:57<17:11, 2.03it/s]
|
864 |
80%|ββββββββ | 8589/10682 [1:12:57<17:09, 2.03it/s]
|
865 |
80%|ββββββββ | 8590/10682 [1:12:58<17:09, 2.03it/s]
|
866 |
80%|ββββββββ | 8591/10682 [1:12:58<17:10, 2.03it/s]
|
867 |
80%|ββββββββ | 8592/10682 [1:12:59<17:08, 2.03it/s]
|
868 |
80%|ββββββββ | 8593/10682 [1:12:59<17:09, 2.03it/s]
|
869 |
80%|ββββββββ | 8594/10682 [1:13:00<17:08, 2.03it/s]
|
870 |
80%|ββββββββ | 8595/10682 [1:13:00<17:08, 2.03it/s]
|
871 |
80%|ββββββββ | 8596/10682 [1:13:01<17:08, 2.03it/s]
|
872 |
80%|ββββββββ | 8597/10682 [1:13:01<17:06, 2.03it/s]
|
873 |
80%|ββββββββ | 8598/10682 [1:13:02<17:08, 2.03it/s]
|
874 |
80%|ββββββββ | 8599/10682 [1:13:02<17:06, 2.03it/s]
|
875 |
81%|ββββββββ | 8600/10682 [1:13:03<17:04, 2.03it/s]
|
876 |
|
|
|
877 |
81%|ββββββββ | 8600/10682 [1:13:03<17:04, 2.03it/s]
|
878 |
81%|ββββββββ | 8601/10682 [1:13:03<17:06, 2.03it/s]
|
879 |
81%|ββββββββ | 8602/10682 [1:13:04<17:05, 2.03it/s]
|
880 |
81%|ββββββββ | 8603/10682 [1:13:04<17:05, 2.03it/s]
|
881 |
81%|ββββββββ | 8604/10682 [1:13:05<17:04, 2.03it/s]
|
882 |
81%|ββββββββ | 8605/10682 [1:13:05<17:04, 2.03it/s]
|
883 |
81%|ββββββββ | 8606/10682 [1:13:06<17:03, 2.03it/s]
|
884 |
81%|ββββββββ | 8607/10682 [1:13:06<17:03, 2.03it/s]
|
885 |
81%|ββββββββ | 8608/10682 [1:13:07<17:02, 2.03it/s]
|
886 |
81%|ββββββββ | 8609/10682 [1:13:07<17:01, 2.03it/s]
|
887 |
81%|ββββββββ | 8610/10682 [1:13:08<17:02, 2.03it/s]
|
888 |
81%|ββββββββ | 8611/10682 [1:13:08<17:01, 2.03it/s]
|
889 |
81%|ββββββββ | 8612/10682 [1:13:09<17:01, 2.03it/s]
|
890 |
81%|ββββββββ | 8613/10682 [1:13:09<16:59, 2.03it/s]
|
891 |
81%|ββββββββ | 8614/10682 [1:13:10<16:59, 2.03it/s]
|
892 |
81%|ββββββββ | 8615/10682 [1:13:10<16:58, 2.03it/s]
|
893 |
81%|ββββββββ | 8616/10682 [1:13:11<16:57, 2.03it/s]
|
894 |
81%|ββββββββ | 8617/10682 [1:13:11<16:58, 2.03it/s]
|
895 |
81%|ββββββββ | 8618/10682 [1:13:12<16:56, 2.03it/s]
|
896 |
81%|ββββββββ | 8619/10682 [1:13:12<16:55, 2.03it/s]
|
897 |
81%|ββββββββ | 8620/10682 [1:13:13<16:55, 2.03it/s]
|
898 |
81%|ββββββββ | 8621/10682 [1:13:13<16:54, 2.03it/s]
|
899 |
81%|ββββββββ | 8622/10682 [1:13:14<16:54, 2.03it/s]
|
900 |
81%|ββββββββ | 8623/10682 [1:13:14<16:54, 2.03it/s]
|
901 |
81%|ββββββββ | 8624/10682 [1:13:15<16:53, 2.03it/s]
|
902 |
81%|ββββββββ | 8625/10682 [1:13:15<16:53, 2.03it/s]
|
903 |
|
|
|
904 |
81%|ββββββββ | 8625/10682 [1:13:15<16:53, 2.03it/s]
|
905 |
81%|ββββββββ | 8626/10682 [1:13:16<16:53, 2.03it/s]
|
906 |
81%|ββββββββ | 8627/10682 [1:13:16<16:53, 2.03it/s]
|
907 |
81%|ββββββββ | 8628/10682 [1:13:17<16:52, 2.03it/s]
|
908 |
81%|ββββββββ | 8629/10682 [1:13:17<16:52, 2.03it/s]
|
909 |
81%|ββββββββ | 8630/10682 [1:13:18<16:50, 2.03it/s]
|
910 |
81%|ββββββββ | 8631/10682 [1:13:18<16:50, 2.03it/s]
|
911 |
81%|ββββββββ | 8632/10682 [1:13:19<16:50, 2.03it/s]
|
912 |
81%|ββββββββ | 8633/10682 [1:13:19<16:49, 2.03it/s]
|
913 |
81%|ββββββββ | 8634/10682 [1:13:19<16:48, 2.03it/s]
|
914 |
81%|ββββββββ | 8635/10682 [1:13:20<16:48, 2.03it/s]
|
915 |
81%|ββββββββ | 8636/10682 [1:13:20<16:47, 2.03it/s]
|
916 |
81%|ββββββββ | 8637/10682 [1:13:21<16:47, 2.03it/s]
|
917 |
81%|ββββββββ | 8638/10682 [1:13:21<16:46, 2.03it/s]
|
918 |
81%|ββββββββ | 8639/10682 [1:13:22<16:45, 2.03it/s]
|
919 |
81%|ββββββββ | 8640/10682 [1:13:22<16:45, 2.03it/s]
|
920 |
81%|ββββββββ | 8641/10682 [1:13:23<16:44, 2.03it/s]
|
921 |
81%|ββββββββ | 8642/10682 [1:13:23<16:44, 2.03it/s]
|
922 |
81%|ββββββββ | 8643/10682 [1:13:24<16:43, 2.03it/s]
|
923 |
81%|ββββββββ | 8644/10682 [1:13:24<16:43, 2.03it/s]
|
924 |
81%|ββββββββ | 8645/10682 [1:13:25<16:44, 2.03it/s]
|
925 |
81%|ββββββββ | 8646/10682 [1:13:25<16:43, 2.03it/s]
|
926 |
81%|ββββββββ | 8647/10682 [1:13:26<16:44, 2.03it/s]
|
927 |
81%|ββββββββ | 8648/10682 [1:13:26<16:43, 2.03it/s]
|
928 |
81%|ββββββββ | 8649/10682 [1:13:27<16:42, 2.03it/s]
|
929 |
81%|ββββββββ | 8650/10682 [1:13:27<16:42, 2.03it/s]
|
930 |
{'loss': 2.8442, 'grad_norm': 0.2641572654247284, 'learning_rate': 0.00010625520936593375, 'epoch': 11.34}
|
|
|
931 |
81%|ββββββββ | 8650/10682 [1:13:27<16:42, 2.03it/s]
|
932 |
81%|ββββββββ | 8651/10682 [1:13:28<16:42, 2.03it/s]
|
933 |
81%|ββββββββ | 8652/10682 [1:13:28<16:41, 2.03it/s]
|
934 |
81%|ββββββββ | 8653/10682 [1:13:29<16:41, 2.03it/s]
|
935 |
81%|ββββββββ | 8654/10682 [1:13:29<16:40, 2.03it/s]
|
936 |
81%|ββββββββ | 8655/10682 [1:13:30<16:39, 2.03it/s]
|
937 |
81%|ββββββββ | 8656/10682 [1:13:30<16:38, 2.03it/s]
|
938 |
81%|ββββββββ | 8657/10682 [1:13:31<16:37, 2.03it/s]
|
939 |
81%|ββββββββ | 8658/10682 [1:13:31<16:36, 2.03it/s]
|
940 |
81%|ββββββββ | 8659/10682 [1:13:32<16:35, 2.03it/s]
|
941 |
81%|ββββββββ | 8660/10682 [1:13:32<16:35, 2.03it/s]
|
942 |
81%|ββββββββ | 8661/10682 [1:13:33<16:35, 2.03it/s]
|
943 |
81%|ββββββββ | 8662/10682 [1:13:33<16:34, 2.03it/s]
|
944 |
81%|ββββββββ | 8663/10682 [1:13:34<16:33, 2.03it/s]
|
945 |
81%|ββββββββ | 8664/10682 [1:13:34<16:33, 2.03it/s]
|
946 |
81%|ββββββββ | 8665/10682 [1:13:35<16:33, 2.03it/s]
|
947 |
81%|ββββββββ | 8666/10682 [1:13:35<16:33, 2.03it/s]
|
948 |
81%|ββββββββ | 8667/10682 [1:13:36<16:32, 2.03it/s]
|
949 |
81%|ββββββββ | 8668/10682 [1:13:36<16:31, 2.03it/s]
|
950 |
81%|ββββββββ | 8669/10682 [1:13:37<16:31, 2.03it/s]
|
951 |
81%|ββββββββ | 8670/10682 [1:13:37<16:31, 2.03it/s]
|
952 |
81%|ββββββββ | 8671/10682 [1:13:38<16:31, 2.03it/s]
|
953 |
81%|ββββββββ | 8672/10682 [1:13:38<16:30, 2.03it/s]
|
954 |
81%|ββββββββ | 8673/10682 [1:13:39<16:28, 2.03it/s]
|
955 |
81%|ββββββββ | 8674/10682 [1:13:39<16:29, 2.03it/s]
|
956 |
81%|ββββββββ | 8675/10682 [1:13:40<16:28, 2.03it/s]{'loss': 2.8423, 'grad_norm': 0.2601149380207062, 'learning_rate': 0.0001037506276287885, 'epoch': 11.37}
|
|
|
957 |
|
958 |
81%|ββββββββ | 8675/10682 [1:13:40<16:28, 2.03it/s]
|
959 |
81%|ββββββββ | 8676/10682 [1:13:40<16:29, 2.03it/s]
|
960 |
81%|ββββββββ | 8677/10682 [1:13:41<16:28, 2.03it/s]
|
961 |
81%|ββββββββ | 8678/10682 [1:13:41<16:28, 2.03it/s]
|
962 |
81%|ββββββββ | 8679/10682 [1:13:42<16:27, 2.03it/s]
|
963 |
81%|βββββββββ | 8680/10682 [1:13:42<16:26, 2.03it/s]
|
964 |
81%|βββββββββ | 8681/10682 [1:13:43<16:26, 2.03it/s]
|
965 |
81%|βββββββββ | 8682/10682 [1:13:43<16:24, 2.03it/s]
|
966 |
81%|βββββββββ | 8683/10682 [1:13:44<16:24, 2.03it/s]
|
967 |
81%|βββββββββ | 8684/10682 [1:13:44<16:23, 2.03it/s]
|
968 |
81%|βββββββββ | 8685/10682 [1:13:45<16:23, 2.03it/s]
|
969 |
81%|βββββββββ | 8686/10682 [1:13:45<16:23, 2.03it/s]
|
970 |
81%|βββββββββ | 8687/10682 [1:13:46<16:21, 2.03it/s]
|
971 |
81%|βββββββββ | 8688/10682 [1:13:46<16:21, 2.03it/s]
|
972 |
81%|βββββββββ | 8689/10682 [1:13:47<16:21, 2.03it/s]
|
973 |
81%|βββββββββ | 8690/10682 [1:13:47<16:19, 2.03it/s]
|
974 |
81%|βββββββββ | 8691/10682 [1:13:48<16:19, 2.03it/s]
|
975 |
81%|βββββββββ | 8692/10682 [1:13:48<16:18, 2.03it/s]
|
976 |
81%|βββββββββ | 8693/10682 [1:13:49<16:18, 2.03it/s]
|
977 |
81%|βββββββββ | 8694/10682 [1:13:49<16:18, 2.03it/s]
|
978 |
81%|βββββββββ | 8695/10682 [1:13:50<16:17, 2.03it/s]
|
979 |
81%|βββββββββ | 8696/10682 [1:13:50<16:18, 2.03it/s]
|
980 |
81%|βββββββββ | 8697/10682 [1:13:51<16:17, 2.03it/s]
|
981 |
81%|βββββββββ | 8698/10682 [1:13:51<16:16, 2.03it/s]
|
982 |
81%|βββββββββ | 8699/10682 [1:13:52<16:16, 2.03it/s]
|
983 |
81%|βββββββββ | 8700/10682 [1:13:52<16:15, 2.03it/s]{'loss': 2.8562, 'grad_norm': 0.2651384770870209, 'learning_rate': 0.0001012724960361826, 'epoch': 11.4}
|
|
|
984 |
|
985 |
81%|βββββββββ | 8700/10682 [1:13:52<16:15, 2.03it/s]
|
986 |
81%|βββββββββ | 8701/10682 [1:13:53<16:16, 2.03it/s]
|
987 |
81%|βββββββββ | 8702/10682 [1:13:53<16:14, 2.03it/s]
|
988 |
81%|βββββββββ | 8703/10682 [1:13:53<16:14, 2.03it/s]
|
989 |
81%|βββββββββ | 8704/10682 [1:13:54<16:14, 2.03it/s]
|
990 |
81%|βββββββββ | 8705/10682 [1:13:54<16:13, 2.03it/s]
|
991 |
82%|βββββββββ | 8706/10682 [1:13:55<16:13, 2.03it/s]
|
992 |
82%|βββββββββ | 8707/10682 [1:13:55<16:12, 2.03it/s]
|
993 |
82%|βββββββββ | 8708/10682 [1:13:56<16:10, 2.03it/s]
|
994 |
82%|βββββββββ | 8709/10682 [1:13:56<16:11, 2.03it/s]
|
995 |
82%|βββββββββ | 8710/10682 [1:13:57<16:10, 2.03it/s]
|
996 |
82%|βββββββββ | 8711/10682 [1:13:57<16:10, 2.03it/s]
|
997 |
82%|βββββββββ | 8712/10682 [1:13:58<16:09, 2.03it/s]
|
998 |
82%|βββββββββ | 8713/10682 [1:13:58<16:09, 2.03it/s]
|
999 |
82%|βββββββββ | 8714/10682 [1:13:59<16:09, 2.03it/s]
|
1000 |
82%|βββββββββ | 8715/10682 [1:13:59<16:08, 2.03it/s]
|
1001 |
82%|βββββββββ | 8716/10682 [1:14:00<16:08, 2.03it/s]
|
1002 |
82%|βββββββββ | 8717/10682 [1:14:00<16:08, 2.03it/s]
|
1003 |
82%|βββββββββ | 8718/10682 [1:14:01<16:06, 2.03it/s]
|
1004 |
82%|βββββββββ | 8719/10682 [1:14:01<16:06, 2.03it/s]
|
1005 |
82%|βββββββββ | 8720/10682 [1:14:02<16:05, 2.03it/s]
|
1006 |
82%|βββββββββ | 8721/10682 [1:14:02<16:04, 2.03it/s]
|
1007 |
82%|βββββββββ | 8722/10682 [1:14:03<16:05, 2.03it/s]
|
1008 |
82%|βββββββββ | 8723/10682 [1:14:03<16:04, 2.03it/s]
|
1009 |
82%|βββββββββ | 8724/10682 [1:14:04<16:04, 2.03it/s]
|
1010 |
82%|βββββββββ | 8725/10682 [1:14:04<16:03, 2.03it/s]{'loss': 2.8428, 'grad_norm': 0.26036229729652405, 'learning_rate': 9.882098000652034e-05, 'epoch': 11.44}
|
|
|
1011 |
|
1012 |
82%|βββββββββ | 8725/10682 [1:14:04<16:03, 2.03it/s]
|
1013 |
82%|βββββββββ | 8726/10682 [1:14:05<16:04, 2.03it/s]
|
1014 |
82%|βββββββββ | 8727/10682 [1:14:05<16:03, 2.03it/s]
|
1015 |
82%|βββββββββ | 8728/10682 [1:14:06<16:02, 2.03it/s]
|
1016 |
82%|βββββββββ | 8729/10682 [1:14:06<16:03, 2.03it/s]
|
1017 |
82%|βββββββββ | 8730/10682 [1:14:07<16:02, 2.03it/s]
|
1018 |
82%|βββββββββ | 8731/10682 [1:14:07<16:01, 2.03it/s]
|
1019 |
82%|βββββββββ | 8732/10682 [1:14:08<16:01, 2.03it/s]
|
1020 |
82%|βββββββββ | 8733/10682 [1:14:08<16:00, 2.03it/s]
|
1021 |
82%|βββββββββ | 8734/10682 [1:14:09<16:00, 2.03it/s]
|
1022 |
82%|βββββββββ | 8735/10682 [1:14:09<15:59, 2.03it/s]
|
1023 |
82%|βββββββββ | 8736/10682 [1:14:10<15:59, 2.03it/s]
|
1024 |
82%|βββββββββ | 8737/10682 [1:14:10<15:58, 2.03it/s]
|
1025 |
82%|βββββββββ | 8738/10682 [1:14:11<15:57, 2.03it/s]
|
1026 |
82%|βββββββββ | 8739/10682 [1:14:11<15:57, 2.03it/s]
|
1027 |
82%|βββββββββ | 8740/10682 [1:14:12<15:56, 2.03it/s]
|
1028 |
82%|βββββββββ | 8741/10682 [1:14:12<15:55, 2.03it/s]
|
1029 |
82%|βββββββββ | 8742/10682 [1:14:13<15:55, 2.03it/s]
|
1030 |
82%|βββββββββ | 8743/10682 [1:14:13<15:55, 2.03it/s]
|
1031 |
82%|βββββββββ | 8744/10682 [1:14:14<15:55, 2.03it/s]
|
1032 |
82%|βββββββββ | 8745/10682 [1:14:14<15:53, 2.03it/s]
|
1033 |
82%|βββββββββ | 8746/10682 [1:14:15<15:53, 2.03it/s]
|
1034 |
82%|βββββββββ | 8747/10682 [1:14:15<15:53, 2.03it/s]
|
1035 |
82%|βββββββββ | 8748/10682 [1:14:16<15:52, 2.03it/s]
|
1036 |
82%|βββββββββ | 8749/10682 [1:14:16<15:52, 2.03it/s]
|
1037 |
82%|βββββββββ | 8750/10682 [1:14:17<15:51, 2.03it/s]{'loss': 2.8425, 'grad_norm': 0.2622228264808655, 'learning_rate': 9.639624318158335e-05, 'epoch': 11.47}
|
|
|
1038 |
|
1039 |
82%|βββββββββ | 8750/10682 [1:14:17<15:51, 2.03it/s]
|
1040 |
82%|ββββββββοΏ½οΏ½ | 8751/10682 [1:14:17<15:52, 2.03it/s]
|
1041 |
82%|βββββββββ | 8752/10682 [1:14:18<15:51, 2.03it/s]
|
1042 |
82%|βββββββββ | 8753/10682 [1:14:18<15:50, 2.03it/s]
|
1043 |
82%|βββββββββ | 8754/10682 [1:14:19<15:49, 2.03it/s]
|
1044 |
82%|βββββββββ | 8755/10682 [1:14:19<15:48, 2.03it/s]
|
1045 |
82%|βββββββββ | 8756/10682 [1:14:20<15:48, 2.03it/s]
|
1046 |
82%|βββββββββ | 8757/10682 [1:14:20<15:47, 2.03it/s]
|
1047 |
82%|βββββββββ | 8758/10682 [1:14:21<15:46, 2.03it/s]
|
1048 |
82%|βββββββββ | 8759/10682 [1:14:21<15:45, 2.03it/s]
|
1049 |
82%|βββββββββ | 8760/10682 [1:14:22<15:45, 2.03it/s]
|
1050 |
82%|βββββββββ | 8761/10682 [1:14:22<15:44, 2.03it/s]
|
1051 |
82%|βββββββββ | 8762/10682 [1:14:23<15:45, 2.03it/s]
|
1052 |
82%|βββββββββ | 8763/10682 [1:14:23<15:44, 2.03it/s]
|
1053 |
82%|βββββββββ | 8764/10682 [1:14:24<15:44, 2.03it/s]
|
1054 |
82%|βββββββββ | 8765/10682 [1:14:24<15:44, 2.03it/s]
|
1055 |
82%|βββββββββ | 8766/10682 [1:14:25<15:43, 2.03it/s]
|
1056 |
82%|βββββββββ | 8767/10682 [1:14:25<15:43, 2.03it/s]
|
1057 |
82%|βββββββββ | 8768/10682 [1:14:25<15:42, 2.03it/s]
|
1058 |
82%|βββββββββ | 8769/10682 [1:14:26<15:40, 2.03it/s]
|
1059 |
82%|βββββββββ | 8770/10682 [1:14:26<15:41, 2.03it/s]
|
1060 |
82%|βββββββββ | 8771/10682 [1:14:27<15:40, 2.03it/s]
|
1061 |
82%|βββββββββ | 8772/10682 [1:14:27<15:40, 2.03it/s]
|
1062 |
82%|βββββββββ | 8773/10682 [1:14:28<15:40, 2.03it/s]
|
1063 |
82%|βββββββββ | 8774/10682 [1:14:28<15:38, 2.03it/s]
|
1064 |
82%|βββββββββ | 8775/10682 [1:14:29<15:39, 2.03it/s]{'loss': 2.8524, 'grad_norm': 0.26476430892944336, 'learning_rate': 9.399844741560781e-05, 'epoch': 11.5}
|
1065 |
|
|
|
1066 |
82%|βββββββββ | 8775/10682 [1:14:29<15:39, 2.03it/s]
|
1067 |
82%|βββββββββ | 8776/10682 [1:14:29<15:40, 2.03it/s]
|
1068 |
82%|βββββββββ | 8777/10682 [1:14:30<15:41, 2.02it/s]
|
1069 |
82%|βββββββββ | 8778/10682 [1:14:30<15:38, 2.03it/s]
|
1070 |
82%|βββββββββ | 8779/10682 [1:14:31<15:38, 2.03it/s]
|
1071 |
82%|βββββββββ | 8780/10682 [1:14:31<15:38, 2.03it/s]
|
1072 |
82%|βββββββββ | 8781/10682 [1:14:32<15:36, 2.03it/s]
|
1073 |
82%|βββββββββ | 8782/10682 [1:14:32<15:36, 2.03it/s]
|
1074 |
82%|βββββββββ | 8783/10682 [1:14:33<15:35, 2.03it/s]
|
1075 |
82%|βββββββββ | 8784/10682 [1:14:33<15:33, 2.03it/s]
|
1076 |
82%|βββββββββ | 8785/10682 [1:14:34<15:34, 2.03it/s]
|
1077 |
82%|βββββββββ | 8786/10682 [1:14:34<15:33, 2.03it/s]
|
1078 |
82%|βββββββββ | 8787/10682 [1:14:35<15:33, 2.03it/s]
|
1079 |
82%|βββββββββ | 8788/10682 [1:14:35<15:34, 2.03it/s]
|
1080 |
82%|βββββββββ | 8789/10682 [1:14:36<15:33, 2.03it/s]
|
1081 |
82%|βββββββββ | 8790/10682 [1:14:36<15:33, 2.03it/s]
|
1082 |
82%|βββββββββ | 8791/10682 [1:14:37<15:32, 2.03it/s]
|
1083 |
82%|βββββββββ | 8792/10682 [1:14:37<15:31, 2.03it/s]
|
1084 |
82%|βββββββββ | 8793/10682 [1:14:38<15:31, 2.03it/s]
|
1085 |
82%|βββββββββ | 8794/10682 [1:14:38<15:30, 2.03it/s]
|
1086 |
82%|βββββββββ | 8795/10682 [1:14:39<15:29, 2.03it/s]
|
1087 |
82%|βββββββββ | 8796/10682 [1:14:39<15:29, 2.03it/s]
|
1088 |
82%|βββββββββ | 8797/10682 [1:14:40<15:28, 2.03it/s]
|
1089 |
82%|βββββββββ | 8798/10682 [1:14:40<15:26, 2.03it/s]
|
1090 |
82%|βββββββββ | 8799/10682 [1:14:41<15:27, 2.03it/s]
|
1091 |
82%|βββββββββ | 8800/10682 [1:14:41<15:27, 2.03it/s]{'loss': 2.8376, 'grad_norm': 0.25995054841041565, 'learning_rate': 9.162775276448015e-05, 'epoch': 11.53}
|
|
|
1092 |
|
1093 |
82%|βββββββββ | 8800/10682 [1:14:41<15:27, 2.03it/s]
|
1094 |
82%|βββββββββ | 8801/10682 [1:14:42<15:28, 2.03it/s]
|
1095 |
82%|βββββββββ | 8802/10682 [1:14:42<15:26, 2.03it/s]
|
1096 |
82%|βββββββββ | 8803/10682 [1:14:43<15:24, 2.03it/s]
|
1097 |
82%|βββββββββ | 8804/10682 [1:14:43<15:23, 2.03it/s]
|
1098 |
82%|βββββββββ | 8805/10682 [1:14:44<15:23, 2.03it/s]
|
1099 |
82%|βββββββββ | 8806/10682 [1:14:44<15:23, 2.03it/s]
|
1100 |
82%|βββββββββ | 8807/10682 [1:14:45<15:22, 2.03it/s]
|
1101 |
82%|βββββββββ | 8808/10682 [1:14:45<15:22, 2.03it/s]
|
1102 |
82%|βββββββββ | 8809/10682 [1:14:46<15:22, 2.03it/s]
|
1103 |
82%|βββββββββ | 8810/10682 [1:14:46<15:21, 2.03it/s]
|
1104 |
82%|βββββββββ | 8811/10682 [1:14:47<15:21, 2.03it/s]
|
1105 |
82%|βββββββββ | 8812/10682 [1:14:47<15:21, 2.03it/s]
|
1106 |
83%|βββββββββ | 8813/10682 [1:14:48<15:20, 2.03it/s]
|
1107 |
83%|βββββββββ | 8814/10682 [1:14:48<15:20, 2.03it/s]
|
1108 |
83%|οΏ½οΏ½οΏ½ββββββββ | 8815/10682 [1:14:49<15:18, 2.03it/s]
|
1109 |
83%|βββββββββ | 8816/10682 [1:14:49<15:18, 2.03it/s]
|
1110 |
83%|βββββββββ | 8817/10682 [1:14:50<15:18, 2.03it/s]
|
1111 |
83%|βββββββββ | 8818/10682 [1:14:50<15:18, 2.03it/s]
|
1112 |
83%|βββββββββ | 8819/10682 [1:14:51<15:18, 2.03it/s]
|
1113 |
83%|βββββββββ | 8820/10682 [1:14:51<15:16, 2.03it/s]
|
1114 |
83%|βββββββββ | 8821/10682 [1:14:52<15:16, 2.03it/s]
|
1115 |
83%|βββββββββ | 8822/10682 [1:14:52<15:16, 2.03it/s]
|
1116 |
83%|βββββββββ | 8823/10682 [1:14:53<15:15, 2.03it/s]
|
1117 |
83%|βββββββββ | 8824/10682 [1:14:53<15:15, 2.03it/s]
|
1118 |
83%|βββββββββ | 8825/10682 [1:14:54<15:14, 2.03it/s]
|
1119 |
|
|
|
1120 |
83%|βββββββββ | 8825/10682 [1:14:54<15:14, 2.03it/s]
|
1121 |
83%|βββββββββ | 8826/10682 [1:14:54<15:15, 2.03it/s]
|
1122 |
83%|βββββββββ | 8827/10682 [1:14:55<15:14, 2.03it/s]
|
1123 |
83%|βββββββββ | 8828/10682 [1:14:55<15:13, 2.03it/s]
|
1124 |
83%|βββββββββ | 8829/10682 [1:14:56<16:34, 1.86it/s]
|
1125 |
83%|βββββββββ | 8830/10682 [1:14:56<16:09, 1.91it/s]
|
1126 |
83%|βββββββββ | 8831/10682 [1:14:57<15:51, 1.94it/s]
|
1127 |
83%|βββββββββ | 8832/10682 [1:14:57<15:38, 1.97it/s]
|
1128 |
83%|βββββββββ | 8833/10682 [1:14:58<15:30, 1.99it/s]
|
1129 |
83%|βββββββββ | 8834/10682 [1:14:58<15:23, 2.00it/s]
|
1130 |
83%|βββββββββ | 8835/10682 [1:14:59<15:19, 2.01it/s]
|
1131 |
83%|βββββββββ | 8836/10682 [1:14:59<15:15, 2.02it/s]
|
1132 |
83%|βββββββββ | 8837/10682 [1:15:00<15:13, 2.02it/s]
|
1133 |
83%|βββββββββ | 8838/10682 [1:15:00<15:11, 2.02it/s]
|
1134 |
83%|βββββββββ | 8839/10682 [1:15:01<15:09, 2.03it/s]
|
1135 |
83%|βββββββββ | 8840/10682 [1:15:01<15:09, 2.03it/s]
|
1136 |
83%|βββββββββ | 8841/10682 [1:15:02<15:07, 2.03it/s]
|
1137 |
83%|βββββββββ | 8842/10682 [1:15:02<15:06, 2.03it/s]
|
1138 |
83%|βββββββββ | 8843/10682 [1:15:03<15:06, 2.03it/s]
|
1139 |
83%|βββββββββ | 8844/10682 [1:15:03<15:05, 2.03it/s]
|
1140 |
83%|βββββββββ | 8845/10682 [1:15:04<15:05, 2.03it/s]
|
1141 |
83%|βββββββββ | 8846/10682 [1:15:04<15:04, 2.03it/s]
|
1142 |
83%|βββββββββ | 8847/10682 [1:15:05<15:04, 2.03it/s]
|
1143 |
83%|βββββββββ | 8848/10682 [1:15:05<15:03, 2.03it/s]
|
1144 |
83%|βββββββββ | 8849/10682 [1:15:06<15:02, 2.03it/s]
|
1145 |
83%|βββββββββ | 8850/10682 [1:15:06<15:02, 2.03it/s]{'loss': 2.8552, 'grad_norm': 0.2628968060016632, 'learning_rate': 8.6968297974584e-05, 'epoch': 11.6}
|
1146 |
|
|
|
1147 |
83%|βββββββββ | 8850/10682 [1:15:06<15:02, 2.03it/s]
|
1148 |
83%|βββββββββ | 8851/10682 [1:15:07<15:03, 2.03it/s]
|
1149 |
83%|βββββββββ | 8852/10682 [1:15:07<15:02, 2.03it/s]
|
1150 |
83%|βββββββββ | 8853/10682 [1:15:08<15:01, 2.03it/s]
|
1151 |
83%|βββββββββ | 8854/10682 [1:15:08<15:01, 2.03it/s]
|
1152 |
83%|βββββββββ | 8855/10682 [1:15:09<15:01, 2.03it/s]
|
1153 |
83%|βββββββββ | 8856/10682 [1:15:09<15:00, 2.03it/s]
|
1154 |
83%|βββββββββ | 8857/10682 [1:15:09<15:01, 2.03it/s]
|
1155 |
83%|βββββββββ | 8858/10682 [1:15:10<14:59, 2.03it/s]
|
1156 |
83%|βββββββββ | 8859/10682 [1:15:10<14:58, 2.03it/s]
|
1157 |
83%|βββββββββ | 8860/10682 [1:15:11<14:58, 2.03it/s]
|
1158 |
83%|βββββββββ | 8861/10682 [1:15:11<14:57, 2.03it/s]
|
1159 |
83%|βββββββββ | 8862/10682 [1:15:12<14:56, 2.03it/s]
|
1160 |
83%|βββββββββ | 8863/10682 [1:15:12<14:56, 2.03it/s]
|
1161 |
83%|βββββββββ | 8864/10682 [1:15:13<14:55, 2.03it/s]
|
1162 |
83%|βββββββββ | 8865/10682 [1:15:13<14:55, 2.03it/s]
|
1163 |
83%|βββββββββ | 8866/10682 [1:15:14<14:54, 2.03it/s]
|
1164 |
83%|βββββββββ | 8867/10682 [1:15:14<14:53, 2.03it/s]
|
1165 |
83%|βββββββββ | 8868/10682 [1:15:15<14:53, 2.03it/s]
|
1166 |
83%|βββββββββ | 8869/10682 [1:15:15<14:51, 2.03it/s]
|
1167 |
83%|βββββββββ | 8870/10682 [1:15:16<14:52, 2.03it/s]
|
1168 |
83%|βββββββββ | 8871/10682 [1:15:16<14:51, 2.03it/s]
|
1169 |
83%|βββββββββ | 8872/10682 [1:15:17<14:51, 2.03it/s]
|
1170 |
83%|βββββββββ | 8873/10682 [1:15:17<14:50, 2.03it/s]
|
1171 |
83%|βββββββββ | 8874/10682 [1:15:18<14:49, 2.03it/s]
|
1172 |
83%|βββββββββ | 8875/10682 [1:15:18<14:50, 2.03it/s]
|
1173 |
{'loss': 2.8535, 'grad_norm': 0.2602517902851105, 'learning_rate': 8.467984886028967e-05, 'epoch': 11.63}
|
|
|
1174 |
83%|βββββββββ | 8875/10682 [1:15:18<14:50, 2.03it/s]
|
1175 |
83%|βββββββββ | 8876/10682 [1:15:19<14:50, 2.03it/s]
|
1176 |
83%|βββββββββ | 8877/10682 [1:15:19<14:50, 2.03it/s]
|
1177 |
83%|βββββββββ | 8878/10682 [1:15:20<14:49, 2.03it/s]
|
1178 |
83%|βββββββββ | 8879/10682 [1:15:20<14:47, 2.03it/s]
|
1179 |
83%|βββββββββ | 8880/10682 [1:15:21<14:47, 2.03it/s]
|
1180 |
83%|βββββββββ | 8881/10682 [1:15:21<14:46, 2.03it/s]
|
1181 |
83%|βββββββββ | 8882/10682 [1:15:22<14:46, 2.03it/s]
|
1182 |
83%|βββββββββ | 8883/10682 [1:15:22<14:45, 2.03it/s]
|
1183 |
83%|βββββββββ | 8884/10682 [1:15:23<14:44, 2.03it/s]
|
1184 |
83%|βββββββββ | 8885/10682 [1:15:23<14:45, 2.03it/s]
|
1185 |
83%|βββββββββ | 8886/10682 [1:15:24<14:43, 2.03it/s]
|
1186 |
83%|βββββββββ | 8887/10682 [1:15:24<14:43, 2.03it/s]
|
1187 |
83%|βββββββββ | 8888/10682 [1:15:25<14:43, 2.03it/s]
|
1188 |
83%|βββββββββ | 8889/10682 [1:15:25<14:42, 2.03it/s]
|
1189 |
83%|βββββββββ | 8890/10682 [1:15:26<14:42, 2.03it/s]
|
1190 |
83%|βββββββββ | 8891/10682 [1:15:26<14:41, 2.03it/s]
|
1191 |
83%|βββββββββ | 8892/10682 [1:15:27<14:41, 2.03it/s]
|
1192 |
83%|βββββββββ | 8893/10682 [1:15:27<14:41, 2.03it/s]
|
1193 |
83%|βββββββββ | 8894/10682 [1:15:28<14:40, 2.03it/s]
|
1194 |
83%|βββββββββ | 8895/10682 [1:15:28<14:40, 2.03it/s]
|
1195 |
83%|βββββββββ | 8896/10682 [1:15:29<14:38, 2.03it/s]
|
1196 |
83%|βββββββββ | 8897/10682 [1:15:29<14:38, 2.03it/s]
|
1197 |
83%|βββββββββ | 8898/10682 [1:15:30<14:38, 2.03it/s]
|
1198 |
83%|βββββββββ | 8899/10682 [1:15:30<14:37, 2.03it/s]
|
1199 |
83%|βββββββββ | 8900/10682 [1:15:31<14:37, 2.03it/s]{'loss': 2.8613, 'grad_norm': 0.264787495136261, 'learning_rate': 8.24191228890303e-05, 'epoch': 11.66}
|
|
|
1200 |
|
1201 |
83%|βββββββββ | 8900/10682 [1:15:31<14:37, 2.03it/s]
|
1202 |
83%|βββββββββ | 8901/10682 [1:15:31<14:37, 2.03it/s]
|
1203 |
83%|βββββββββ | 8902/10682 [1:15:32<14:36, 2.03it/s]
|
1204 |
83%|βββββββββ | 8903/10682 [1:15:32<14:35, 2.03it/s]
|
1205 |
83%|βββββββββ | 8904/10682 [1:15:33<14:34, 2.03it/s]
|
1206 |
83%|βββββββββ | 8905/10682 [1:15:33<14:34, 2.03it/s]
|
1207 |
83%|βββββββββ | 8906/10682 [1:15:34<14:34, 2.03it/s]
|
1208 |
83%|βββββββββ | 8907/10682 [1:15:34<14:33, 2.03it/s]
|
1209 |
83%|βββββββββ | 8908/10682 [1:15:35<14:32, 2.03it/s]
|
1210 |
83%|βββββββββ | 8909/10682 [1:15:35<14:32, 2.03it/s]
|
1211 |
83%|βββββββββ | 8910/10682 [1:15:36<14:31, 2.03it/s]
|
1212 |
83%|βββββββββ | 8911/10682 [1:15:36<14:31, 2.03it/s]
|
1213 |
83%|βββββββββ | 8912/10682 [1:15:37<14:30, 2.03it/s]
|
1214 |
83%|βββββββββ | 8913/10682 [1:15:37<14:30, 2.03it/s]
|
1215 |
83%|βββββββββ | 8914/10682 [1:15:38<14:29, 2.03it/s]
|
1216 |
83%|βββββββββ | 8915/10682 [1:15:38<14:29, 2.03it/s]
|
1217 |
83%|βββββββββ | 8916/10682 [1:15:39<14:28, 2.03it/s]
|
1218 |
83%|βββββββββ | 8917/10682 [1:15:39<14:28, 2.03it/s]
|
1219 |
83%|βββββββββ | 8918/10682 [1:15:40<14:26, 2.03it/s]
|
1220 |
83%|βββββββββ | 8919/10682 [1:15:40<14:27, 2.03it/s]
|
1221 |
84%|βββββββββ | 8920/10682 [1:15:41<14:27, 2.03it/s]
|
1222 |
84%|βββββββββ | 8921/10682 [1:15:41<14:26, 2.03it/s]
|
1223 |
84%|βββββββββ | 8922/10682 [1:15:41<14:25, 2.03it/s]
|
1224 |
84%|βββββββββ | 8923/10682 [1:15:42<14:25, 2.03it/s]
|
1225 |
84%|βββββββββ | 8924/10682 [1:15:42<14:24, 2.03it/s]
|
1226 |
84%|βββββββββ | 8925/10682 [1:15:43<14:24, 2.03it/s]{'loss': 2.8574, 'grad_norm': 0.26087984442710876, 'learning_rate': 8.018627096711106e-05, 'epoch': 11.7}
|
1227 |
|
|
|
1228 |
84%|βββββββββ | 8925/10682 [1:15:43<14:24, 2.03it/s]
|
1229 |
84%|βββββββββ | 8926/10682 [1:15:43<14:25, 2.03it/s]
|
1230 |
84%|βββββββββ | 8927/10682 [1:15:44<14:24, 2.03it/s]
|
1231 |
84%|βββββββββ | 8928/10682 [1:15:44<14:25, 2.03it/s]
|
1232 |
84%|βββββββββ | 8929/10682 [1:15:45<14:24, 2.03it/s]
|
1233 |
84%|βββββββββ | 8930/10682 [1:15:45<14:23, 2.03it/s]
|
1234 |
84%|βββββββββ | 8931/10682 [1:15:46<14:23, 2.03it/s]
|
1235 |
84%|βββββββββ | 8932/10682 [1:15:47<15:39, 1.86it/s]
|
1236 |
84%|βββββββββ | 8933/10682 [1:15:47<15:15, 1.91it/s]
|
1237 |
84%|βββββββββ | 8934/10682 [1:15:48<14:58, 1.95it/s]
|
1238 |
84%|βββββββββ | 8935/10682 [1:15:48<14:46, 1.97it/s]
|
1239 |
84%|βββββββββ | 8936/10682 [1:15:49<14:38, 1.99it/s]
|
1240 |
84%|βββββββββ | 8937/10682 [1:15:49<14:32, 2.00it/s]
|
1241 |
84%|βββββββββ | 8938/10682 [1:15:50<14:27, 2.01it/s]
|
1242 |
84%|ββββοΏ½οΏ½ββββ | 8939/10682 [1:15:50<14:24, 2.02it/s]
|
1243 |
84%|βββββββββ | 8940/10682 [1:15:51<14:22, 2.02it/s]
|
1244 |
84%|βββββββββ | 8941/10682 [1:15:51<14:20, 2.02it/s]
|
1245 |
84%|βββββββββ | 8942/10682 [1:15:51<14:19, 2.03it/s]
|
1246 |
84%|βββββββββ | 8943/10682 [1:15:52<14:17, 2.03it/s]
|
1247 |
84%|βββββββββ | 8944/10682 [1:15:52<14:16, 2.03it/s]
|
1248 |
84%|βββββββββ | 8945/10682 [1:15:53<14:15, 2.03it/s]
|
1249 |
84%|βββββββββ | 8946/10682 [1:15:53<14:15, 2.03it/s]
|
1250 |
84%|βββββββββ | 8947/10682 [1:15:54<14:14, 2.03it/s]
|
1251 |
84%|βββββββββ | 8948/10682 [1:15:54<14:13, 2.03it/s]
|
1252 |
84%|βββββββββ | 8949/10682 [1:15:55<14:13, 2.03it/s]
|
1253 |
84%|βββββββββ | 8950/10682 [1:15:55<14:12, 2.03it/s]{'loss': 2.8577, 'grad_norm': 0.2632240951061249, 'learning_rate': 7.798144214020909e-05, 'epoch': 11.73}
|
1254 |
|
|
|
1255 |
84%|βββββββββ | 8950/10682 [1:15:55<14:12, 2.03it/s]
|
1256 |
84%|βββββββββ | 8951/10682 [1:15:56<14:14, 2.03it/s]
|
1257 |
84%|βββββββββ | 8952/10682 [1:15:56<14:12, 2.03it/s]
|
1258 |
84%|βββββββββ | 8953/10682 [1:15:57<14:12, 2.03it/s]
|
1259 |
84%|βββββββββ | 8954/10682 [1:15:57<14:11, 2.03it/s]
|
1260 |
84%|βββββββββ | 8955/10682 [1:15:58<14:10, 2.03it/s]
|
1261 |
84%|βββββββββ | 8956/10682 [1:15:58<14:10, 2.03it/s]
|
1262 |
84%|βββββββββ | 8957/10682 [1:15:59<14:10, 2.03it/s]
|
1263 |
84%|βββββββββ | 8958/10682 [1:15:59<14:07, 2.03it/s]
|
1264 |
84%|βββββββββ | 8959/10682 [1:16:00<14:07, 2.03it/s]
|
1265 |
84%|βββββββββ | 8960/10682 [1:16:00<14:08, 2.03it/s]
|
1266 |
84%|βββββββββ | 8961/10682 [1:16:01<14:08, 2.03it/s]
|
1267 |
84%|βββββββββ | 8962/10682 [1:16:01<14:07, 2.03it/s]
|
1268 |
84%|βββββββββ | 8963/10682 [1:16:02<14:07, 2.03it/s]
|
1269 |
84%|βββββββββ | 8964/10682 [1:16:02<14:06, 2.03it/s]
|
1270 |
84%|βββββββββ | 8965/10682 [1:16:03<14:05, 2.03it/s]
|
1271 |
84%|βββββββββ | 8966/10682 [1:16:03<14:05, 2.03it/s]
|
1272 |
84%|βββββββββ | 8967/10682 [1:16:04<14:05, 2.03it/s]
|
1273 |
84%|βββββββββ | 8968/10682 [1:16:04<14:05, 2.03it/s]
|
1274 |
84%|βββββββββ | 8969/10682 [1:16:05<14:04, 2.03it/s]
|
1275 |
84%|βββββββββ | 8970/10682 [1:16:05<14:03, 2.03it/s]
|
1276 |
84%|βββββββββ | 8971/10682 [1:16:06<14:03, 2.03it/s]
|
1277 |
84%|βββββββββ | 8972/10682 [1:16:06<14:02, 2.03it/s]
|
1278 |
84%|βββββββββ | 8973/10682 [1:16:07<14:02, 2.03it/s]
|
1279 |
84%|βββββββββ | 8974/10682 [1:16:07<14:01, 2.03it/s]
|
1280 |
84%|βββββββββ | 8975/10682 [1:16:08<14:00, 2.03it/s]
|
1281 |
|
|
|
1282 |
84%|βββββββββ | 8975/10682 [1:16:08<14:00, 2.03it/s]
|
1283 |
84%|βββββββββ | 8976/10682 [1:16:08<14:00, 2.03it/s]
|
1284 |
84%|βββββββββ | 8977/10682 [1:16:09<13:59, 2.03it/s]
|
1285 |
84%|βββββββββ | 8978/10682 [1:16:09<13:59, 2.03it/s]
|
1286 |
84%|βββββββββ | 8979/10682 [1:16:10<13:58, 2.03it/s]
|
1287 |
84%|βββββββββ | 8980/10682 [1:16:10<13:58, 2.03it/s]
|
1288 |
84%|βββββββββ | 8981/10682 [1:16:11<13:57, 2.03it/s]
|
1289 |
84%|βββββββββ | 8982/10682 [1:16:11<13:57, 2.03it/s]
|
1290 |
84%|βββββββββ | 8983/10682 [1:16:12<13:57, 2.03it/s]
|
1291 |
84%|βββββββββ | 8984/10682 [1:16:12<13:56, 2.03it/s]
|
1292 |
84%|βββββββββ | 8985/10682 [1:16:13<13:56, 2.03it/s]
|
1293 |
84%|βββββββββ | 8986/10682 [1:16:13<13:55, 2.03it/s]
|
1294 |
84%|βββββββββ | 8987/10682 [1:16:14<13:54, 2.03it/s]
|
1295 |
84%|βββββββββ | 8988/10682 [1:16:14<13:55, 2.03it/s]
|
1296 |
84%|βββββββββ | 8989/10682 [1:16:15<13:53, 2.03it/s]
|
1297 |
84%|βββββββββ | 8990/10682 [1:16:15<13:52, 2.03it/s]
|
1298 |
84%|βββββββββ | 8991/10682 [1:16:16<13:53, 2.03it/s]
|
1299 |
84%|βββββββββ | 8992/10682 [1:16:16<13:52, 2.03it/s]
|
1300 |
84%|βββββββββ | 8993/10682 [1:16:17<13:52, 2.03it/s]
|
1301 |
84%|βββββββββ | 8994/10682 [1:16:17<13:51, 2.03it/s]
|
1302 |
84%|βββββββββ | 8995/10682 [1:16:18<13:50, 2.03it/s]
|
1303 |
84%|βββββββββ | 8996/10682 [1:16:18<13:51, 2.03it/s]
|
1304 |
84%|βββββββββ | 8997/10682 [1:16:19<13:49, 2.03it/s]
|
1305 |
84%|βββββββββ | 8998/10682 [1:16:19<13:48, 2.03it/s]
|
1306 |
84%|βββββββββ | 8999/10682 [1:16:20<13:48, 2.03it/s]
|
1307 |
84%|βββββββββ | 9000/10682 [1:16:20<13:47, 2.03it/s]
|
1308 |
{'loss': 2.8481, 'grad_norm': 0.2617539167404175, 'learning_rate': 7.365644059145782e-05, 'epoch': 11.8}
|
|
|
1309 |
84%|βββββββββ | 9000/10682 [1:16:20<13:47, 2.03it/s]
|
1310 |
84%|βββββββββ | 9001/10682 [1:16:21<13:48, 2.03it/s]
|
1311 |
84%|βββββββββ | 9002/10682 [1:16:21<13:47, 2.03it/s]
|
1312 |
84%|βββββββββ | 9003/10682 [1:16:22<13:47, 2.03it/s]
|
1313 |
84%|βββββββββ | 9004/10682 [1:16:22<13:47, 2.03it/s]
|
1314 |
84%|βββββββββ | 9005/10682 [1:16:23<13:46, 2.03it/s]
|
1315 |
84%|βββββββββ | 9006/10682 [1:16:23<13:46, 2.03it/s]
|
1316 |
84%|βββββββββ | 9007/10682 [1:16:24<13:44, 2.03it/s]
|
1317 |
84%|βββββββββ | 9008/10682 [1:16:24<13:44, 2.03it/s]
|
1318 |
84%|βββββββββ | 9009/10682 [1:16:24<13:43, 2.03it/s]
|
1319 |
84%|βββββββββ | 9010/10682 [1:16:25<13:42, 2.03it/s]
|
1320 |
84%|βββββββββ | 9011/10682 [1:16:25<13:42, 2.03it/s]
|
1321 |
84%|βββββββββ | 9012/10682 [1:16:26<13:42, 2.03it/s]
|
1322 |
84%|βββββββββ | 9013/10682 [1:16:26<13:41, 2.03it/s]
|
1323 |
84%|βββββββββ | 9014/10682 [1:16:27<13:41, 2.03it/s]
|
1324 |
84%|βββββββββ | 9015/10682 [1:16:27<13:40, 2.03it/s]
|
1325 |
84%|βββββββββ | 9016/10682 [1:16:28<13:40, 2.03it/s]
|
1326 |
84%|βββββββββ | 9017/10682 [1:16:28<13:39, 2.03it/s]
|
1327 |
84%|βββββββββ | 9018/10682 [1:16:29<13:38, 2.03it/s]
|
1328 |
84%|βββββββββ | 9019/10682 [1:16:29<13:39, 2.03it/s]
|
1329 |
84%|βββββββββ | 9020/10682 [1:16:30<13:38, 2.03it/s]
|
1330 |
84%|βββββββββ | 9021/10682 [1:16:30<13:37, 2.03it/s]
|
1331 |
84%|βββββββββ | 9022/10682 [1:16:31<13:37, 2.03it/s]
|
1332 |
84%|βββββββββ | 9023/10682 [1:16:31<13:36, 2.03it/s]
|
1333 |
84%|βββββββββ | 9024/10682 [1:16:32<13:36, 2.03it/s]
|
1334 |
84%|βββββββββ | 9025/10682 [1:16:32<13:36, 2.03it/s]{'loss': 2.8558, 'grad_norm': 0.26141008734703064, 'learning_rate': 7.153655656890773e-05, 'epoch': 11.83}
|
|
|
1335 |
|
1336 |
84%|βββββββββ | 9025/10682 [1:16:32<13:36, 2.03it/s]
|
1337 |
84%|βββββββββ | 9026/10682 [1:16:33<13:37, 2.03it/s]
|
1338 |
85%|βββββββββ | 9027/10682 [1:16:33<13:36, 2.03it/s]
|
1339 |
85%|βββββββββ | 9028/10682 [1:16:34<13:36, 2.02it/s]
|
1340 |
85%|βββββββββ | 9029/10682 [1:16:34<13:35, 2.03it/s]
|
1341 |
85%|βββββββββ | 9030/10682 [1:16:35<13:34, 2.03it/s]
|
1342 |
85%|βββββββββ | 9031/10682 [1:16:35<13:33, 2.03it/s]
|
1343 |
85%|βββββββββ | 9032/10682 [1:16:36<13:32, 2.03it/s]
|
1344 |
85%|βββββββββ | 9033/10682 [1:16:36<13:32, 2.03it/s]
|
1345 |
85%|βββββββββ | 9034/10682 [1:16:37<13:31, 2.03it/s]
|
1346 |
85%|βββββββββ | 9035/10682 [1:16:37<13:31, 2.03it/s]
|
1347 |
85%|βββββββββ | 9036/10682 [1:16:38<13:32, 2.03it/s]
|
1348 |
85%|βββββββββ | 9037/10682 [1:16:38<13:30, 2.03it/s]
|
1349 |
85%|βββββββββ | 9038/10682 [1:16:39<13:30, 2.03it/s]
|
1350 |
85%|βββββββββ | 9039/10682 [1:16:39<13:29, 2.03it/s]
|
1351 |
85%|βββββββββ | 9040/10682 [1:16:40<13:29, 2.03it/s]
|
1352 |
85%|βββββββββ | 9041/10682 [1:16:40<13:28, 2.03it/s]
|
1353 |
85%|βββββββββ | 9042/10682 [1:16:41<13:27, 2.03it/s]
|
1354 |
85%|βββββββββ | 9043/10682 [1:16:41<13:27, 2.03it/s]
|
1355 |
85%|βββββββββ | 9044/10682 [1:16:42<13:27, 2.03it/s]
|
1356 |
85%|βββββββββ | 9045/10682 [1:16:42<13:26, 2.03it/s]
|
1357 |
85%|βββββββββ | 9046/10682 [1:16:43<13:25, 2.03it/s]
|
1358 |
85%|βββββββββ | 9047/10682 [1:16:43<13:25, 2.03it/s]
|
1359 |
85%|βββββββββ | 9048/10682 [1:16:44<13:26, 2.03it/s]
|
1360 |
85%|βββββββββ | 9049/10682 [1:16:44<13:24, 2.03it/s]
|
1361 |
85%|βββββββββ | 9050/10682 [1:16:45<13:24, 2.03it/s]{'loss': 2.8601, 'grad_norm': 0.2692682147026062, 'learning_rate': 6.94452730207023e-05, 'epoch': 11.86}
|
1362 |
|
|
|
1363 |
85%|βββββββββ | 9050/10682 [1:16:45<13:24, 2.03it/s]
|
1364 |
85%|βββββββββ | 9051/10682 [1:16:45<13:24, 2.03it/s]
|
1365 |
85%|βββββββββ | 9052/10682 [1:16:46<13:24, 2.03it/s]
|
1366 |
85%|βββββββββ | 9053/10682 [1:16:46<13:23, 2.03it/s]
|
1367 |
85%|βββββββββ | 9054/10682 [1:16:47<13:21, 2.03it/s]
|
1368 |
85%|βββββββββ | 9055/10682 [1:16:47<13:22, 2.03it/s]
|
1369 |
85%|βββββββββ | 9056/10682 [1:16:48<13:21, 2.03it/s]
|
1370 |
85%|βββββββββ | 9057/10682 [1:16:48<13:20, 2.03it/s]
|
1371 |
85%|βββββββββ | 9058/10682 [1:16:49<13:20, 2.03it/s]
|
1372 |
85%|βββββββββ | 9059/10682 [1:16:49<13:19, 2.03it/s]
|
1373 |
85%|βββββββββ | 9060/10682 [1:16:50<13:18, 2.03it/s]
|
1374 |
85%|βββββββββ | 9061/10682 [1:16:50<13:19, 2.03it/s]
|
1375 |
85%|βββββββββ | 9062/10682 [1:16:51<13:18, 2.03it/s]
|
1376 |
85%|βββββββοΏ½οΏ½β | 9063/10682 [1:16:51<13:17, 2.03it/s]
|
1377 |
85%|βββββββββ | 9064/10682 [1:16:52<13:18, 2.03it/s]
|
1378 |
85%|βββββββββ | 9065/10682 [1:16:52<13:17, 2.03it/s]
|
1379 |
85%|βββββββββ | 9066/10682 [1:16:53<13:17, 2.03it/s]
|
1380 |
85%|βββββββββ | 9067/10682 [1:16:53<13:16, 2.03it/s]
|
1381 |
85%|βββββββββ | 9068/10682 [1:16:54<13:15, 2.03it/s]
|
1382 |
85%|βββββββββ | 9069/10682 [1:16:54<13:15, 2.03it/s]
|
1383 |
85%|βββββββββ | 9070/10682 [1:16:55<13:14, 2.03it/s]
|
1384 |
85%|βββββββββ | 9071/10682 [1:16:55<13:13, 2.03it/s]
|
1385 |
85%|βββββββββ | 9072/10682 [1:16:56<13:14, 2.03it/s]
|
1386 |
85%|βββββββββ | 9073/10682 [1:16:56<13:13, 2.03it/s]
|
1387 |
85%|βββββββββ | 9074/10682 [1:16:57<13:12, 2.03it/s]
|
1388 |
85%|βββββββββ | 9075/10682 [1:16:57<13:11, 2.03it/s]
|
1389 |
{'loss': 2.8627, 'grad_norm': 0.2634871006011963, 'learning_rate': 6.738272954265156e-05, 'epoch': 11.89}
|
|
|
1390 |
85%|βββββββββ | 9075/10682 [1:16:57<13:11, 2.03it/s]
|
1391 |
85%|βββββββββ | 9076/10682 [1:16:58<13:12, 2.03it/s]
|
1392 |
85%|βββββββββ | 9077/10682 [1:16:58<13:11, 2.03it/s]
|
1393 |
85%|βββββββββ | 9078/10682 [1:16:59<13:10, 2.03it/s]
|
1394 |
85%|βββββββββ | 9079/10682 [1:16:59<13:09, 2.03it/s]
|
1395 |
85%|βββββββββ | 9080/10682 [1:16:59<13:09, 2.03it/s]
|
1396 |
85%|βββββββββ | 9081/10682 [1:17:00<13:09, 2.03it/s]
|
1397 |
85%|βββββββββ | 9082/10682 [1:17:00<13:07, 2.03it/s]
|
1398 |
85%|βββββββββ | 9083/10682 [1:17:01<13:06, 2.03it/s]
|
1399 |
85%|βββββββββ | 9084/10682 [1:17:01<13:07, 2.03it/s]
|
1400 |
85%|βββββββββ | 9085/10682 [1:17:02<13:05, 2.03it/s]
|
1401 |
85%|βββββββββ | 9086/10682 [1:17:02<13:05, 2.03it/s]
|
1402 |
85%|βββββββββ | 9087/10682 [1:17:03<13:05, 2.03it/s]
|
1403 |
85%|βββββββββ | 9088/10682 [1:17:03<13:04, 2.03it/s]
|
1404 |
85%|βββββββββ | 9089/10682 [1:17:04<13:04, 2.03it/s]
|
1405 |
85%|βββββββββ | 9090/10682 [1:17:04<13:03, 2.03it/s]
|
1406 |
85%|βββββββββ | 9091/10682 [1:17:05<13:03, 2.03it/s]
|
1407 |
85%|βββββββββ | 9092/10682 [1:17:05<13:02, 2.03it/s]
|
1408 |
85%|βββββββββ | 9093/10682 [1:17:06<13:01, 2.03it/s]
|
1409 |
85%|βββββββββ | 9094/10682 [1:17:06<13:02, 2.03it/s]
|
1410 |
85%|βββββββββ | 9095/10682 [1:17:07<13:00, 2.03it/s]
|
1411 |
85%|βββββββββ | 9096/10682 [1:17:07<13:01, 2.03it/s]
|
1412 |
85%|βββββββββ | 9097/10682 [1:17:08<13:00, 2.03it/s]
|
1413 |
85%|βββββββββ | 9098/10682 [1:17:08<12:59, 2.03it/s]
|
1414 |
85%|βββββββββ | 9099/10682 [1:17:09<12:59, 2.03it/s]
|
1415 |
85%|βββββββββ | 9100/10682 [1:17:09<12:58, 2.03it/s]{'loss': 2.8535, 'grad_norm': 0.26476970314979553, 'learning_rate': 6.534906381212979e-05, 'epoch': 11.93}
|
|
|
1416 |
|
1417 |
85%|βββββββββ | 9100/10682 [1:17:09<12:58, 2.03it/s]
|
1418 |
85%|βββββββββ | 9101/10682 [1:17:10<13:00, 2.03it/s]
|
1419 |
85%|βββββββββ | 9102/10682 [1:17:10<12:58, 2.03it/s]
|
1420 |
85%|βββββββββ | 9103/10682 [1:17:11<12:58, 2.03it/s]
|
1421 |
85%|βββββββββ | 9104/10682 [1:17:11<12:57, 2.03it/s]
|
1422 |
85%|βββββββββ | 9105/10682 [1:17:12<12:56, 2.03it/s]
|
1423 |
85%|βββββββββ | 9106/10682 [1:17:12<12:56, 2.03it/s]
|
1424 |
85%|βββββββββ | 9107/10682 [1:17:13<12:55, 2.03it/s]
|
1425 |
85%|βββββββββ | 9108/10682 [1:17:13<12:54, 2.03it/s]
|
1426 |
85%|βββββββββ | 9109/10682 [1:17:14<12:54, 2.03it/s]
|
1427 |
85%|βββββββββ | 9110/10682 [1:17:14<12:54, 2.03it/s]
|
1428 |
85%|βββββββββ | 9111/10682 [1:17:15<12:53, 2.03it/s]
|
1429 |
85%|βββββββββ | 9112/10682 [1:17:15<12:53, 2.03it/s]
|
1430 |
85%|βββββββββ | 9113/10682 [1:17:16<12:52, 2.03it/s]
|
1431 |
85%|βββββββββ | 9114/10682 [1:17:16<12:52, 2.03it/s]
|
1432 |
85%|βββββββββ | 9115/10682 [1:17:17<12:52, 2.03it/s]
|
1433 |
85%|βββββββββ | 9116/10682 [1:17:17<12:52, 2.03it/s]
|
1434 |
85%|βββββββββ | 9117/10682 [1:17:18<12:51, 2.03it/s]
|
1435 |
85%|βββββββββ | 9118/10682 [1:17:18<12:51, 2.03it/s]
|
1436 |
85%|βββββββββ | 9119/10682 [1:17:19<12:50, 2.03it/s]
|
1437 |
85%|βββββββββ | 9120/10682 [1:17:19<12:49, 2.03it/s]
|
1438 |
85%|βββββββββ | 9121/10682 [1:17:20<12:49, 2.03it/s]
|
1439 |
85%|βββββββββ | 9122/10682 [1:17:20<12:48, 2.03it/s]
|
1440 |
85%|βββββββββ | 9123/10682 [1:17:21<12:48, 2.03it/s]
|
1441 |
85%|βββββββββ | 9124/10682 [1:17:21<12:47, 2.03it/s]
|
1442 |
85%|βββββββββ | 9125/10682 [1:17:22<12:47, 2.03it/s]{'loss': 2.8536, 'grad_norm': 0.26291805505752563, 'learning_rate': 6.334441157888504e-05, 'epoch': 11.96}
|
|
|
1443 |
|
1444 |
85%|βββββββββ | 9125/10682 [1:17:22<12:47, 2.03it/s]
|
1445 |
85%|βββββββββ | 9126/10682 [1:17:22<12:47, 2.03it/s]
|
1446 |
85%|βββββββββ | 9127/10682 [1:17:23<12:46, 2.03it/s]
|
1447 |
85%|βββββββββ | 9128/10682 [1:17:23<12:45, 2.03it/s]
|
1448 |
85%|βββββββββ | 9129/10682 [1:17:24<12:45, 2.03it/s]
|
1449 |
85%|βββββββββ | 9130/10682 [1:17:24<12:44, 2.03it/s]
|
1450 |
85%|βββββββββ | 9131/10682 [1:17:25<12:44, 2.03it/s]
|
1451 |
85%|βββββββββ | 9132/10682 [1:17:25<12:43, 2.03it/s]
|
1452 |
85%|βββββββββ | 9133/10682 [1:17:26<12:43, 2.03it/s]
|
1453 |
86%|βββββββββ | 9134/10682 [1:17:26<12:42, 2.03it/s]
|
1454 |
86%|βββββββββ | 9135/10682 [1:17:27<12:41, 2.03it/s]
|
1455 |
86%|βββββββββ | 9136/10682 [1:17:27<12:41, 2.03it/s]
|
1456 |
86%|βββββββββ | 9137/10682 [1:17:28<12:40, 2.03it/s]
|
1457 |
86%|βββββββββ | 9138/10682 [1:17:28<12:40, 2.03it/s]
|
1458 |
86%|βββββββββ | 9139/10682 [1:17:29<12:40, 2.03it/s]
|
1459 |
86%|βββββββββ | 9140/10682 [1:17:29<12:38, 2.03it/s]
|
1460 |
86%|βββββββββ | 9141/10682 [1:17:30<12:38, 2.03it/s]
|
1461 |
86%|βββββββββ | 9142/10682 [1:17:30<12:38, 2.03it/s]
|
1462 |
86%|βββββββββ | 9143/10682 [1:17:31<12:37, 2.03it/s]
|
1463 |
86%|βββββββββ | 9144/10682 [1:17:31<12:37, 2.03it/s]
|
1464 |
86%|βββββββββ | 9145/10682 [1:17:32<12:36, 2.03it/s]
|
1465 |
86%|βββββββββ | 9146/10682 [1:17:32<12:35, 2.03it/s]
|
1466 |
86%|βββββββββ | 9147/10682 [1:17:32<12:36, 2.03it/s]
|
1467 |
86%|βββββββββ | 9148/10682 [1:17:33<12:35, 2.03it/s]
|
1468 |
86%|βββββββββ | 9149/10682 [1:17:33<12:35, 2.03it/s]
|
1469 |
86%|βββββββββ | 9150/10682 [1:17:34<12:34, 2.03it/s]{'loss': 2.8583, 'grad_norm': 0.2615031898021698, 'learning_rate': 6.1368906655978e-05, 'epoch': 11.99}
|
1470 |
|
|
|
1471 |
86%|βββββββββ | 9150/10682 [1:17:34<12:34, 2.03it/s]
|
1472 |
86%|βββββββββ | 9151/10682 [1:17:34<12:35, 2.03it/s]
|
1473 |
86%|βββββββββ | 9152/10682 [1:17:35<12:34, 2.03it/s]
|
1474 |
86%|βββββββββ | 9153/10682 [1:17:35<12:33, 2.03it/s]
|
1475 |
86%|βββββββββ | 9154/10682 [1:17:36<12:33, 2.03it/s]
|
1476 |
86%|βββββββββ | 9155/10682 [1:17:36<12:32, 2.03it/s]
|
1477 |
86%|βββββββββ | 9156/10682 [1:17:37<12:25, 2.05it/s]
|
1478 |
86%|βββββββββ | 9157/10682 [1:17:52<2:01:47, 4.79s/it]
|
1479 |
86%|βββββββββ | 9158/10682 [1:17:52<1:29:01, 3.50s/it]
|
1480 |
86%|βββββββββ | 9159/10682 [1:17:53<1:06:01, 2.60s/it]
|
1481 |
86%|βββββββββ | 9160/10682 [1:17:53<50:00, 1.97s/it]
|
1482 |
86%|βββββββββ | 9161/10682 [1:17:54<38:43, 1.53s/it]
|
1483 |
86%|βββββββββ | 9162/10682 [1:17:54<30:50, 1.22s/it]
|
1484 |
86%|βββββββββ | 9163/10682 [1:17:55<25:20, 1.00s/it]
|
1485 |
86%|βββββββββ | 9164/10682 [1:17:55<21:28, 1.18it/s]
|
1486 |
86%|βββββββββ | 9165/10682 [1:17:56<18:44, 1.35it/s]
|
1487 |
86%|βββββββββ | 9166/10682 [1:17:56<16:50, 1.50it/s]
|
1488 |
86%|βββββββββ | 9167/10682 [1:17:57<15:32, 1.63it/s]
|
1489 |
86%|βββββββββ | 9168/10682 [1:17:57<14:35, 1.73it/s]
|
1490 |
86%|βββββββββ | 9169/10682 [1:17:58<13:55, 1.81it/s]
|
1491 |
86%|βββββββββ | 9170/10682 [1:17:58<13:28, 1.87it/s]
|
1492 |
86%|βββββββββ | 9171/10682 [1:17:59<13:07, 1.92it/s]
|
1493 |
86%|βββββββββ | 9172/10682 [1:17:59<12:54, 1.95it/s]
|
1494 |
86%|βββββββββ | 9173/10682 [1:18:00<12:45, 1.97it/s]
|
1495 |
86%|βββββββββ | 9174/10682 [1:18:00<12:38, 1.99it/s]
|
1496 |
86%|βββββββββ | 9175/10682 [1:18:01<12:32, 2.00it/s]
|
1497 |
|
|
|
1498 |
86%|βββββββββ | 9175/10682 [1:18:01<12:32, 2.00it/s]
|
1499 |
86%|βββββββββ | 9176/10682 [1:18:01<12:30, 2.01it/s]
|
1500 |
86%|βββββββββ | 9177/10682 [1:18:02<12:26, 2.01it/s]
|
1501 |
86%|βββββββββ | 9178/10682 [1:18:02<12:24, 2.02it/s]
|
1502 |
86%|βββββββββ | 9179/10682 [1:18:03<12:23, 2.02it/s]
|
1503 |
86%|βββββββββ | 9180/10682 [1:18:03<12:22, 2.02it/s]
|
1504 |
86%|βββββββββ | 9181/10682 [1:18:04<12:21, 2.02it/s]
|
1505 |
86%|βββββββββ | 9182/10682 [1:18:04<12:20, 2.03it/s]
|
1506 |
86%|βββββββββ | 9183/10682 [1:18:05<12:19, 2.03it/s]
|
1507 |
86%|βββββββββ | 9184/10682 [1:18:05<12:19, 2.03it/s]
|
1508 |
86%|βββββββββ | 9185/10682 [1:18:06<12:19, 2.03it/s]
|
1509 |
86%|βββββββββ | 9186/10682 [1:18:06<12:17, 2.03it/s]
|
1510 |
86%|βββββββββ | 9187/10682 [1:18:07<12:15, 2.03it/s]
|
1511 |
86%|βββββββββ | 9188/10682 [1:18:07<12:15, 2.03it/s]
|
|
|
557 |
|
558 |
78%|ββββββββ | 8325/10682 [1:10:33<19:19, 2.03it/s]
|
559 |
78%|ββββββββ | 8326/10682 [1:10:33<19:21, 2.03it/s]
|
560 |
78%|ββββββββ | 8327/10682 [1:10:34<19:21, 2.03it/s]
|
561 |
78%|ββββββββ | 8328/10682 [1:10:34<19:20, 2.03it/s]
|
562 |
78%|ββββββββ | 8329/10682 [1:10:35<19:18, 2.03it/s]
|
563 |
78%|ββββββββ | 8330/10682 [1:10:35<19:17, 2.03it/s]
|
564 |
78%|ββββββββ | 8331/10682 [1:10:36<19:18, 2.03it/s]
|
565 |
78%|ββββββββ | 8332/10682 [1:10:36<19:16, 2.03it/s]
|
566 |
78%|ββββββββ | 8333/10682 [1:10:37<19:16, 2.03it/s]
|
567 |
78%|ββββββββ | 8334/10682 [1:10:37<19:16, 2.03it/s]
|
568 |
78%|ββββββββ | 8335/10682 [1:10:38<19:15, 2.03it/s]
|
569 |
78%|ββββββββ | 8336/10682 [1:10:38<19:15, 2.03it/s]
|
570 |
78%|ββββββββ | 8337/10682 [1:10:39<19:13, 2.03it/s]
|
571 |
78%|ββββββββ | 8338/10682 [1:10:39<19:13, 2.03it/s]
|
572 |
78%|ββββββββ | 8339/10682 [1:10:40<19:13, 2.03it/s]
|
573 |
78%|ββββββββ | 8340/10682 [1:10:40<19:12, 2.03it/s]
|
574 |
78%|ββββββββ | 8341/10682 [1:10:41<19:11, 2.03it/s]
|
575 |
78%|ββββββββ | 8342/10682 [1:10:41<19:12, 2.03it/s]
|
576 |
78%|ββββββββ | 8343/10682 [1:10:42<19:13, 2.03it/s]
|
577 |
78%|ββββββββ | 8344/10682 [1:10:42<19:12, 2.03it/s]
|
578 |
78%|ββββββββ | 8345/10682 [1:10:43<19:10, 2.03it/s]
|
579 |
78%|ββββββββ | 8346/10682 [1:10:43<19:09, 2.03it/s]
|
580 |
78%|ββββββββ | 8347/10682 [1:10:44<19:10, 2.03it/s]
|
581 |
78%|ββββββββ | 8348/10682 [1:10:44<19:09, 2.03it/s]
|
582 |
78%|ββββββββ | 8349/10682 [1:10:45<19:07, 2.03it/s]
|
583 |
78%|ββββββββ | 8350/10682 [1:10:45<19:07, 2.03it/s]{'loss': 2.9119, 'grad_norm': 0.256185919046402, 'learning_rate': 0.00013831071326327282, 'epoch': 10.94}
|
584 |
|
585 |
|
586 |
78%|ββββββββ | 8350/10682 [1:10:45<19:07, 2.03it/s]
|
587 |
78%|ββββββββ | 8351/10682 [1:10:46<19:08, 2.03it/s]
|
588 |
78%|ββββββββ | 8352/10682 [1:10:46<19:09, 2.03it/s]
|
589 |
78%|ββββββββ | 8353/10682 [1:10:47<19:07, 2.03it/s]
|
590 |
78%|ββββββββ | 8354/10682 [1:10:47<19:08, 2.03it/s]
|
591 |
78%|ββββββββ | 8355/10682 [1:10:48<19:07, 2.03it/s]
|
592 |
78%|ββββββββ | 8356/10682 [1:10:48<19:06, 2.03it/s]
|
593 |
78%|ββββββββ | 8357/10682 [1:10:49<19:06, 2.03it/s]
|
594 |
78%|ββββββββ | 8358/10682 [1:10:49<19:04, 2.03it/s]
|
595 |
78%|ββββββββ | 8359/10682 [1:10:50<19:04, 2.03it/s]
|
596 |
78%|ββββββββ | 8360/10682 [1:10:50<19:03, 2.03it/s]
|
597 |
78%|ββββββββ | 8361/10682 [1:10:51<19:02, 2.03it/s]
|
598 |
78%|ββββββββ | 8362/10682 [1:10:51<19:03, 2.03it/s]
|
599 |
78%|ββββββββ | 8363/10682 [1:10:52<19:01, 2.03it/s]
|
600 |
78%|ββββββββ | 8364/10682 [1:10:52<19:00, 2.03it/s]
|
601 |
78%|ββββββββ | 8365/10682 [1:10:53<19:00, 2.03it/s]
|
602 |
78%|ββββββββ | 8366/10682 [1:10:53<18:59, 2.03it/s]
|
603 |
78%|ββββββββ | 8367/10682 [1:10:54<18:58, 2.03it/s]
|
604 |
78%|ββββββββ | 8368/10682 [1:10:54<18:58, 2.03it/s]
|
605 |
78%|ββββββββ | 8369/10682 [1:10:55<18:57, 2.03it/s]
|
606 |
78%|ββββββββ | 8370/10682 [1:10:55<18:59, 2.03it/s]
|
607 |
78%|ββββββββ | 8371/10682 [1:10:56<18:57, 2.03it/s]
|
608 |
78%|ββββββββ | 8372/10682 [1:10:56<18:58, 2.03it/s]
|
609 |
78%|ββββββββ | 8373/10682 [1:10:57<18:57, 2.03it/s]
|
610 |
78%|ββββββββ | 8374/10682 [1:10:57<18:56, 2.03it/s]
|
611 |
78%|ββββββββ | 8375/10682 [1:10:58<18:57, 2.03it/s]{'loss': 2.9219, 'grad_norm': 0.2577957808971405, 'learning_rate': 0.0001355022636953933, 'epoch': 10.98}
|
612 |
|
613 |
|
614 |
78%|ββββββββ | 8375/10682 [1:10:58<18:57, 2.03it/s]
|
615 |
78%|ββββββββ | 8376/10682 [1:10:58<18:57, 2.03it/s]
|
616 |
78%|ββββββββ | 8377/10682 [1:10:59<18:58, 2.03it/s]
|
617 |
78%|ββββββββ | 8378/10682 [1:10:59<18:57, 2.03it/s]
|
618 |
78%|ββββββββ | 8379/10682 [1:11:00<18:56, 2.03it/s]
|
619 |
78%|ββββββββ | 8380/10682 [1:11:00<18:55, 2.03it/s]
|
620 |
78%|ββββββββ | 8381/10682 [1:11:01<18:54, 2.03it/s]
|
621 |
78%|ββββββββ | 8382/10682 [1:11:01<18:54, 2.03it/s]
|
622 |
78%|ββββββββ | 8383/10682 [1:11:02<18:53, 2.03it/s]
|
623 |
78%|ββββββββ | 8384/10682 [1:11:02<18:52, 2.03it/s]
|
624 |
78%|ββββββββ | 8385/10682 [1:11:03<18:51, 2.03it/s]
|
625 |
79%|ββββββββ | 8386/10682 [1:11:03<18:51, 2.03it/s]
|
626 |
79%|ββββββββ | 8387/10682 [1:11:04<18:51, 2.03it/s]
|
627 |
79%|ββββββββ | 8388/10682 [1:11:04<18:49, 2.03it/s]
|
628 |
79%|ββββββββ | 8389/10682 [1:11:05<18:48, 2.03it/s]
|
629 |
79%|ββββββββ | 8390/10682 [1:11:05<18:48, 2.03it/s]
|
630 |
79%|ββββββββ | 8391/10682 [1:11:06<18:47, 2.03it/s]
|
631 |
79%|ββββββββ | 8392/10682 [1:11:06<18:49, 2.03it/s]
|
632 |
79%|ββββββββ | 8393/10682 [1:11:06<18:36, 2.05it/s]
|
633 |
79%|ββββββββ | 8394/10682 [1:11:21<3:01:51, 4.77s/it]
|
634 |
79%|ββββββββ | 8395/10682 [1:11:22<2:12:51, 3.49s/it]
|
635 |
79%|ββββββββ | 8396/10682 [1:11:22<1:38:43, 2.59s/it]
|
636 |
79%|ββββββββ | 8397/10682 [1:11:23<1:14:42, 1.96s/it]
|
637 |
79%|ββββββββ | 8398/10682 [1:11:23<57:59, 1.52s/it]
|
638 |
79%|ββββββββ | 8399/10682 [1:11:24<46:11, 1.21s/it]
|
639 |
79%|ββββββββ | 8400/10682 [1:11:24<37:57, 1.00it/s]
|
640 |
|
|
|
641 |
79%|ββββββββ | 8400/10682 [1:11:24<37:57, 1.00it/s]
|
642 |
79%|ββββββββ | 8401/10682 [1:11:25<32:22, 1.17it/s]
|
643 |
79%|ββββββββ | 8402/10682 [1:11:25<28:16, 1.34it/s]
|
644 |
79%|ββββββββ | 8403/10682 [1:11:26<25:23, 1.50it/s]
|
645 |
79%|ββββββββ | 8404/10682 [1:11:26<23:24, 1.62it/s]
|
646 |
79%|ββββββββ | 8405/10682 [1:11:27<21:59, 1.73it/s]
|
647 |
79%|ββββββββ | 8406/10682 [1:11:27<20:58, 1.81it/s]
|
648 |
79%|ββββββββ | 8407/10682 [1:11:28<20:16, 1.87it/s]
|
649 |
79%|ββββββββ | 8408/10682 [1:11:28<19:46, 1.92it/s]
|
650 |
79%|ββββββββ | 8409/10682 [1:11:29<19:26, 1.95it/s]
|
651 |
79%|ββββββββ | 8410/10682 [1:11:29<19:11, 1.97it/s]
|
652 |
79%|ββββββββ | 8411/10682 [1:11:30<19:04, 1.98it/s]
|
653 |
79%|ββββββββ | 8412/10682 [1:11:30<18:54, 2.00it/s]
|
654 |
79%|ββββββββ | 8413/10682 [1:11:31<18:48, 2.01it/s]
|
655 |
79%|ββββββββ | 8414/10682 [1:11:31<18:46, 2.01it/s]
|
656 |
79%|ββββββββ | 8415/10682 [1:11:32<18:43, 2.02it/s]
|
657 |
79%|ββββββββ | 8416/10682 [1:11:32<18:41, 2.02it/s]
|
658 |
79%|ββββββββ | 8417/10682 [1:11:33<18:38, 2.02it/s]
|
659 |
79%|ββββββββ | 8418/10682 [1:11:33<18:38, 2.02it/s]
|
660 |
+
|
661 |
79%|ββββββββ | 8400/10682 [1:11:24<37:57, 1.00it/s]
|
662 |
79%|ββββββββ | 8401/10682 [1:11:25<32:22, 1.17it/s]
|
663 |
79%|ββββββββ | 8402/10682 [1:11:25<28:16, 1.34it/s]
|
664 |
79%|ββββββββ | 8403/10682 [1:11:26<25:23, 1.50it/s]
|
665 |
79%|ββββββββ | 8404/10682 [1:11:26<23:24, 1.62it/s]
|
666 |
79%|ββββββββ | 8405/10682 [1:11:27<21:59, 1.73it/s]
|
667 |
79%|ββββββββ | 8406/10682 [1:11:27<20:58, 1.81it/s]
|
668 |
79%|ββββββββ | 8407/10682 [1:11:28<20:16, 1.87it/s]
|
669 |
79%|ββββββββ | 8408/10682 [1:11:28<19:46, 1.92it/s]
|
670 |
79%|ββββββββ | 8409/10682 [1:11:29<19:26, 1.95it/s]
|
671 |
79%|ββββββββ | 8410/10682 [1:11:29<19:11, 1.97it/s]
|
672 |
79%|ββββββββ | 8411/10682 [1:11:30<19:04, 1.98it/s]
|
673 |
79%|ββββββββ | 8412/10682 [1:11:30<18:54, 2.00it/s]
|
674 |
79%|ββββββββ | 8413/10682 [1:11:31<18:48, 2.01it/s]
|
675 |
79%|ββββββββ | 8414/10682 [1:11:31<18:46, 2.01it/s]
|
676 |
79%|ββββββββ | 8415/10682 [1:11:32<18:43, 2.02it/s]
|
677 |
79%|ββββββββ | 8416/10682 [1:11:32<18:41, 2.02it/s]
|
678 |
79%|ββββββββ | 8417/10682 [1:11:33<18:38, 2.02it/s]
|
679 |
79%|ββββββββ | 8418/10682 [1:11:33<18:38, 2.02it/s]
|
680 |
79%|ββββββββ | 8419/10682 [1:11:34<18:36, 2.03it/s]
|
681 |
79%|ββββββββ | 8420/10682 [1:11:34<18:36, 2.03it/s]
|
682 |
79%|ββββββββ | 8421/10682 [1:11:35<18:35, 2.03it/s]
|
683 |
79%|ββββββββ | 8422/10682 [1:11:35<18:33, 2.03it/s]
|
684 |
79%|ββββββββ | 8423/10682 [1:11:36<18:33, 2.03it/s]
|
685 |
79%|ββββββββ | 8424/10682 [1:11:36<18:32, 2.03it/s]
|
686 |
79%|ββββββββ | 8425/10682 [1:11:37<18:32, 2.03it/s]
|
687 |
|
688 |
+
|
689 |
79%|ββββββββ | 8425/10682 [1:11:37<18:32, 2.03it/s]
|
690 |
79%|ββββββββ | 8426/10682 [1:11:37<18:32, 2.03it/s]
|
691 |
79%|ββββββββ | 8427/10682 [1:11:38<18:30, 2.03it/s]
|
692 |
79%|ββββββββ | 8428/10682 [1:11:38<18:30, 2.03it/s]
|
693 |
79%|ββββββββ | 8429/10682 [1:11:39<18:29, 2.03it/s]
|
694 |
79%|ββββββββ | 8430/10682 [1:11:39<18:30, 2.03it/s]
|
695 |
79%|ββββββββ | 8431/10682 [1:11:40<18:29, 2.03it/s]
|
696 |
79%|ββββββββ | 8432/10682 [1:11:40<18:29, 2.03it/s]
|
697 |
79%|ββββββββ | 8433/10682 [1:11:40<18:28, 2.03it/s]
|
698 |
79%|ββββββββ | 8434/10682 [1:11:41<18:27, 2.03it/s]
|
699 |
79%|ββββββββ | 8435/10682 [1:11:41<18:28, 2.03it/s]
|
700 |
79%|ββββββββ | 8436/10682 [1:11:42<18:27, 2.03it/s]
|
701 |
79%|ββββββββ | 8437/10682 [1:11:42<18:27, 2.03it/s]
|
702 |
79%|ββββββββ | 8438/10682 [1:11:43<18:26, 2.03it/s]
|
703 |
79%|ββββββββ | 8439/10682 [1:11:43<18:23, 2.03it/s]
|
704 |
79%|ββββββββ | 8440/10682 [1:11:44<18:24, 2.03it/s]
|
705 |
79%|ββββββββ | 8441/10682 [1:11:44<18:23, 2.03it/s]
|
706 |
79%|ββββββββ | 8442/10682 [1:11:45<18:21, 2.03it/s]
|
707 |
79%|ββββββββ | 8443/10682 [1:11:45<18:22, 2.03it/s]
|
708 |
79%|ββββββββ | 8444/10682 [1:11:46<18:20, 2.03it/s]
|
709 |
79%|ββββββββ | 8445/10682 [1:11:46<18:20, 2.03it/s]
|
710 |
79%|ββββββββ | 8446/10682 [1:11:47<18:20, 2.03it/s]
|
711 |
79%|ββββββββ | 8447/10682 [1:11:47<18:19, 2.03it/s]
|
712 |
79%|ββββββββ | 8448/10682 [1:11:48<18:21, 2.03it/s]
|
713 |
79%|ββββββββ | 8449/10682 [1:11:48<18:21, 2.03it/s]
|
714 |
79%|ββββββββ | 8450/10682 [1:11:49<18:20, 2.03it/s]
|
715 |
|
716 |
+
|
717 |
79%|ββββββββ | 8450/10682 [1:11:49<18:20, 2.03it/s]
|
718 |
79%|ββββββββ | 8451/10682 [1:11:49<18:21, 2.03it/s]
|
719 |
79%|ββββββββ | 8452/10682 [1:11:50<18:20, 2.03it/s]
|
720 |
79%|ββββββββ | 8453/10682 [1:11:50<18:19, 2.03it/s]
|
721 |
79%|ββββββββ | 8454/10682 [1:11:51<18:18, 2.03it/s]
|
722 |
79%|ββββββββ | 8455/10682 [1:11:51<18:17, 2.03it/s]
|
723 |
79%|ββββββββ | 8456/10682 [1:11:52<18:17, 2.03it/s]
|
724 |
79%|ββββββββ | 8457/10682 [1:11:52<18:15, 2.03it/s]
|
725 |
79%|ββββββββ | 8458/10682 [1:11:53<18:15, 2.03it/s]
|
726 |
79%|ββββββββ | 8459/10682 [1:11:53<18:15, 2.03it/s]
|
727 |
79%|ββββββββ | 8460/10682 [1:11:54<18:15, 2.03it/s]
|
728 |
79%|ββββββββ | 8461/10682 [1:11:54<18:14, 2.03it/s]
|
729 |
79%|ββββββββ | 8462/10682 [1:11:55<18:13, 2.03it/s]
|
730 |
79%|ββββββββ | 8463/10682 [1:11:55<18:13, 2.03it/s]
|
731 |
79%|ββββββββ | 8464/10682 [1:11:56<18:12, 2.03it/s]
|
732 |
79%|ββββββββ | 8465/10682 [1:11:56<18:11, 2.03it/s]
|
733 |
79%|ββββββββ | 8466/10682 [1:11:57<18:12, 2.03it/s]
|
734 |
79%|ββββββββ | 8467/10682 [1:11:57<18:11, 2.03it/s]
|
735 |
79%|ββββββββ | 8468/10682 [1:11:58<18:10, 2.03it/s]
|
736 |
79%|ββββββββ | 8469/10682 [1:11:58<18:10, 2.03it/s]
|
737 |
79%|ββββββββ | 8470/10682 [1:11:59<18:08, 2.03it/s]
|
738 |
79%|ββββββββ | 8471/10682 [1:11:59<18:08, 2.03it/s]
|
739 |
79%|ββββββββ | 8472/10682 [1:12:00<18:06, 2.03it/s]
|
740 |
79%|ββββββββ | 8473/10682 [1:12:00<18:06, 2.03it/s]
|
741 |
79%|ββββββββ | 8474/10682 [1:12:01<18:06, 2.03it/s]
|
742 |
79%|ββββββββ | 8475/10682 [1:12:01<18:05, 2.03it/s]
|
743 |
{'loss': 2.819, 'grad_norm': 0.2613165080547333, 'learning_rate': 0.00012451362249196797, 'epoch': 11.11}
|
744 |
+
|
745 |
79%|ββββββββ | 8475/10682 [1:12:01<18:05, 2.03it/s]
|
746 |
79%|ββββββββ | 8476/10682 [1:12:02<18:06, 2.03it/s]
|
747 |
79%|ββββββββ | 8477/10682 [1:12:02<18:05, 2.03it/s]
|
748 |
79%|ββββββββ | 8478/10682 [1:12:03<18:05, 2.03it/s]
|
749 |
79%|ββββββββ | 8479/10682 [1:12:03<18:05, 2.03it/s]
|
750 |
79%|ββββββββ | 8480/10682 [1:12:04<18:04, 2.03it/s]
|
751 |
79%|ββββββββ | 8481/10682 [1:12:04<18:04, 2.03it/s]
|
752 |
79%|ββββββββ | 8482/10682 [1:12:05<18:02, 2.03it/s]
|
753 |
79%|ββββββββ | 8483/10682 [1:12:05<18:02, 2.03it/s]
|
754 |
79%|ββββββββ | 8484/10682 [1:12:06<18:01, 2.03it/s]
|
755 |
79%|ββββββββ | 8485/10682 [1:12:06<18:00, 2.03it/s]
|
756 |
79%|ββββββββ | 8486/10682 [1:12:07<18:00, 2.03it/s]
|
757 |
79%|ββββββββ | 8487/10682 [1:12:07<18:00, 2.03it/s]
|
758 |
79%|ββββββββ | 8488/10682 [1:12:08<18:00, 2.03it/s]
|
759 |
79%|ββββββββ | 8489/10682 [1:12:08<18:00, 2.03it/s]
|
760 |
79%|ββββββββ | 8490/10682 [1:12:09<17:59, 2.03it/s]
|
761 |
79%|ββββββββ | 8491/10682 [1:12:09<18:00, 2.03it/s]
|
762 |
79%|ββββββββ | 8492/10682 [1:12:10<17:59, 2.03it/s]
|
763 |
80%|ββββββββ | 8493/10682 [1:12:10<17:59, 2.03it/s]
|
764 |
80%|ββββββββ | 8494/10682 [1:12:11<17:57, 2.03it/s]
|
765 |
80%|ββββββββ | 8495/10682 [1:12:11<17:57, 2.03it/s]
|
766 |
80%|ββββββββ | 8496/10682 [1:12:12<17:56, 2.03it/s]
|
767 |
80%|ββββββββ | 8497/10682 [1:12:12<17:56, 2.03it/s]
|
768 |
80%|ββββββββ | 8498/10682 [1:12:13<17:55, 2.03it/s]
|
769 |
80%|ββββββββ | 8499/10682 [1:12:13<17:55, 2.03it/s]
|
770 |
80%|ββββββββ | 8500/10682 [1:12:13<17:53, 2.03it/s]
|
771 |
|
772 |
+
|
773 |
80%|ββββββββ | 8500/10682 [1:12:13<17:53, 2.03it/s]
|
774 |
80%|ββββββββ | 8501/10682 [1:12:14<17:56, 2.03it/s]
|
775 |
80%|ββββββββ | 8502/10682 [1:12:14<17:54, 2.03it/s]
|
776 |
80%|ββββββββ | 8503/10682 [1:12:15<17:53, 2.03it/s]
|
777 |
80%|ββββββββ | 8504/10682 [1:12:15<17:54, 2.03it/s]
|
778 |
80%|ββββββββ | 8505/10682 [1:12:16<17:52, 2.03it/s]
|
779 |
80%|ββββββββ | 8506/10682 [1:12:16<17:51, 2.03it/s]
|
780 |
80%|ββββββββ | 8507/10682 [1:12:17<17:51, 2.03it/s]
|
781 |
80%|ββββββββ | 8508/10682 [1:12:17<17:49, 2.03it/s]
|
782 |
80%|ββββββββ | 8509/10682 [1:12:18<17:50, 2.03it/s]
|
783 |
80%|ββββββββ | 8510/10682 [1:12:18<17:48, 2.03it/s]
|
784 |
80%|ββββββββ | 8511/10682 [1:12:19<17:48, 2.03it/s]
|
785 |
80%|ββββββββ | 8512/10682 [1:12:19<17:47, 2.03it/s]
|
786 |
80%|ββββββββ | 8513/10682 [1:12:20<17:47, 2.03it/s]
|
787 |
80%|ββββββββ | 8514/10682 [1:12:20<17:46, 2.03it/s]
|
788 |
80%|ββββββββ | 8515/10682 [1:12:21<17:46, 2.03it/s]
|
789 |
80%|ββββββββ | 8516/10682 [1:12:21<17:46, 2.03it/s]
|
790 |
80%|ββββββββ | 8517/10682 [1:12:22<17:45, 2.03it/s]
|
791 |
80%|ββββββββ | 8518/10682 [1:12:22<17:44, 2.03it/s]
|
792 |
80%|ββββββββ | 8519/10682 [1:12:23<17:44, 2.03it/s]
|
793 |
80%|ββββββββ | 8520/10682 [1:12:23<17:43, 2.03it/s]
|
794 |
80%|ββββββββ | 8521/10682 [1:12:24<17:43, 2.03it/s]
|
795 |
80%|ββββββββ | 8522/10682 [1:12:24<17:43, 2.03it/s]
|
796 |
80%|ββββββββ | 8523/10682 [1:12:25<17:41, 2.03it/s]
|
797 |
80%|ββββββββ | 8524/10682 [1:12:25<17:41, 2.03it/s]
|
798 |
80%|ββββββββ | 8525/10682 [1:12:26<17:41, 2.03it/s]
|
799 |
|
800 |
+
|
801 |
80%|ββββββββ | 8525/10682 [1:12:26<17:41, 2.03it/s]
|
802 |
80%|ββββββββ | 8526/10682 [1:12:26<17:42, 2.03it/s]
|
803 |
80%|ββββββββ | 8527/10682 [1:12:27<17:42, 2.03it/s]
|
804 |
80%|ββββββββ | 8528/10682 [1:12:27<17:40, 2.03it/s]
|
805 |
80%|ββββββββ | 8529/10682 [1:12:28<17:40, 2.03it/s]
|
806 |
80%|ββββββββ | 8530/10682 [1:12:28<17:39, 2.03it/s]
|
807 |
80%|ββββββββ | 8531/10682 [1:12:29<17:38, 2.03it/s]
|
808 |
80%|ββββββββ | 8532/10682 [1:12:29<17:39, 2.03it/s]
|
809 |
80%|ββββββββ | 8533/10682 [1:12:30<17:37, 2.03it/s]
|
810 |
80%|ββββββββ | 8534/10682 [1:12:30<17:36, 2.03it/s]
|
811 |
80%|ββββββββ | 8535/10682 [1:12:31<17:36, 2.03it/s]
|
812 |
80%|ββββββββ | 8536/10682 [1:12:31<17:36, 2.03it/s]
|
813 |
80%|ββββββββ | 8537/10682 [1:12:32<17:35, 2.03it/s]
|
814 |
80%|ββββββββ | 8538/10682 [1:12:32<17:35, 2.03it/s]
|
815 |
80%|ββββββββ | 8539/10682 [1:12:33<17:34, 2.03it/s]
|
816 |
80%|ββββββββ | 8540/10682 [1:12:33<17:34, 2.03it/s]
|
817 |
80%|ββββββββ | 8541/10682 [1:12:34<17:32, 2.03it/s]
|
818 |
80%|ββββββββ | 8542/10682 [1:12:34<17:32, 2.03it/s]
|
819 |
80%|ββββββββ | 8543/10682 [1:12:35<17:32, 2.03it/s]
|
820 |
80%|ββββββββ | 8544/10682 [1:12:35<17:31, 2.03it/s]
|
821 |
80%|ββββββββ | 8545/10682 [1:12:36<17:32, 2.03it/s]
|
822 |
80%|ββββββββ | 8546/10682 [1:12:36<17:31, 2.03it/s]
|
823 |
80%|ββββββββ | 8547/10682 [1:12:37<17:31, 2.03it/s]
|
824 |
80%|ββββββββ | 8548/10682 [1:12:37<17:31, 2.03it/s]
|
825 |
80%|ββββββββ | 8549/10682 [1:12:38<17:29, 2.03it/s]
|
826 |
80%|ββββββββ | 8550/10682 [1:12:38<17:31, 2.03it/s]{'loss': 2.8274, 'grad_norm': 0.271226167678833, 'learning_rate': 0.0001165346678316832, 'epoch': 11.21}
|
827 |
+
|
828 |
|
829 |
80%|ββββββββ | 8550/10682 [1:12:38<17:31, 2.03it/s]
|
830 |
80%|ββββββββ | 8551/10682 [1:12:39<17:32, 2.03it/s]
|
831 |
80%|ββββββββ | 8552/10682 [1:12:39<17:31, 2.03it/s]
|
832 |
80%|ββββββββ | 8553/10682 [1:12:40<17:30, 2.03it/s]
|
833 |
80%|ββββββββ | 8554/10682 [1:12:40<17:30, 2.03it/s]
|
834 |
80%|ββββββββ | 8555/10682 [1:12:41<17:29, 2.03it/s]
|
835 |
80%|ββββββββ | 8556/10682 [1:12:41<17:28, 2.03it/s]
|
836 |
80%|ββββββββ | 8557/10682 [1:12:42<17:27, 2.03it/s]
|
837 |
80%|ββββββββ | 8558/10682 [1:12:42<17:26, 2.03it/s]
|
838 |
80%|ββββββββ | 8559/10682 [1:12:43<17:27, 2.03it/s]
|
839 |
80%|ββββββββ | 8560/10682 [1:12:43<17:25, 2.03it/s]
|
840 |
80%|ββββββββ | 8561/10682 [1:12:44<17:25, 2.03it/s]
|
841 |
80%|ββββββββ | 8562/10682 [1:12:44<17:25, 2.03it/s]
|
842 |
80%|ββββββββ | 8563/10682 [1:12:45<17:23, 2.03it/s]
|
843 |
80%|ββββββββ | 8564/10682 [1:12:45<17:23, 2.03it/s]
|
844 |
80%|ββββββββ | 8565/10682 [1:12:46<17:22, 2.03it/s]
|
845 |
80%|ββββββββ | 8566/10682 [1:12:46<17:21, 2.03it/s]
|
846 |
80%|ββββββββ | 8567/10682 [1:12:46<17:21, 2.03it/s]
|
847 |
80%|ββββββββ | 8568/10682 [1:12:47<17:20, 2.03it/s]
|
848 |
80%|ββββββββ | 8569/10682 [1:12:47<17:20, 2.03it/s]
|
849 |
80%|ββββββββ | 8570/10682 [1:12:48<17:19, 2.03it/s]
|
850 |
80%|ββββββββ | 8571/10682 [1:12:48<17:18, 2.03it/s]
|
851 |
80%|ββββββββ | 8572/10682 [1:12:49<17:18, 2.03it/s]
|
852 |
80%|ββββββββ | 8573/10682 [1:12:49<17:17, 2.03it/s]
|
853 |
80%|ββββββββ | 8574/10682 [1:12:50<17:16, 2.03it/s]
|
854 |
80%|ββββββββ | 8575/10682 [1:12:50<17:16, 2.03it/s]{'loss': 2.8339, 'grad_norm': 0.2710798978805542, 'learning_rate': 0.00011392597484393285, 'epoch': 11.24}
|
855 |
|
856 |
+
|
857 |
80%|ββββββββ | 8575/10682 [1:12:50<17:16, 2.03it/s]
|
858 |
80%|ββββββββ | 8576/10682 [1:12:51<17:16, 2.03it/s]
|
859 |
80%|ββββββββ | 8577/10682 [1:12:51<17:16, 2.03it/s]
|
860 |
80%|ββββββββ | 8578/10682 [1:12:52<17:16, 2.03it/s]
|
861 |
80%|ββββββββ | 8579/10682 [1:12:52<17:15, 2.03it/s]
|
862 |
80%|ββββββββ | 8580/10682 [1:12:53<17:15, 2.03it/s]
|
863 |
80%|ββββββββ | 8581/10682 [1:12:53<17:13, 2.03it/s]
|
864 |
80%|ββββββββ | 8582/10682 [1:12:54<17:13, 2.03it/s]
|
865 |
80%|ββββββββ | 8583/10682 [1:12:54<17:13, 2.03it/s]
|
866 |
80%|ββββββββ | 8584/10682 [1:12:55<17:12, 2.03it/s]
|
867 |
80%|ββββββββ | 8585/10682 [1:12:55<17:13, 2.03it/s]
|
868 |
80%|ββββββββ | 8586/10682 [1:12:56<17:12, 2.03it/s]
|
869 |
80%|ββββββββ | 8587/10682 [1:12:56<17:10, 2.03it/s]
|
870 |
80%|ββββββββ | 8588/10682 [1:12:57<17:11, 2.03it/s]
|
871 |
80%|ββββββββ | 8589/10682 [1:12:57<17:09, 2.03it/s]
|
872 |
80%|ββββββββ | 8590/10682 [1:12:58<17:09, 2.03it/s]
|
873 |
80%|ββββββββ | 8591/10682 [1:12:58<17:10, 2.03it/s]
|
874 |
80%|ββββββββ | 8592/10682 [1:12:59<17:08, 2.03it/s]
|
875 |
80%|ββββββββ | 8593/10682 [1:12:59<17:09, 2.03it/s]
|
876 |
80%|ββββββββ | 8594/10682 [1:13:00<17:08, 2.03it/s]
|
877 |
80%|ββββββββ | 8595/10682 [1:13:00<17:08, 2.03it/s]
|
878 |
80%|ββββββββ | 8596/10682 [1:13:01<17:08, 2.03it/s]
|
879 |
80%|ββββββββ | 8597/10682 [1:13:01<17:06, 2.03it/s]
|
880 |
80%|ββββββββ | 8598/10682 [1:13:02<17:08, 2.03it/s]
|
881 |
80%|ββββββββ | 8599/10682 [1:13:02<17:06, 2.03it/s]
|
882 |
81%|ββββββββ | 8600/10682 [1:13:03<17:04, 2.03it/s]
|
883 |
|
884 |
+
|
885 |
81%|ββββββββ | 8600/10682 [1:13:03<17:04, 2.03it/s]
|
886 |
81%|ββββββββ | 8601/10682 [1:13:03<17:06, 2.03it/s]
|
887 |
81%|ββββββββ | 8602/10682 [1:13:04<17:05, 2.03it/s]
|
888 |
81%|ββββββββ | 8603/10682 [1:13:04<17:05, 2.03it/s]
|
889 |
81%|ββββββββ | 8604/10682 [1:13:05<17:04, 2.03it/s]
|
890 |
81%|ββββββββ | 8605/10682 [1:13:05<17:04, 2.03it/s]
|
891 |
81%|ββββββββ | 8606/10682 [1:13:06<17:03, 2.03it/s]
|
892 |
81%|ββββββββ | 8607/10682 [1:13:06<17:03, 2.03it/s]
|
893 |
81%|ββββββββ | 8608/10682 [1:13:07<17:02, 2.03it/s]
|
894 |
81%|ββββββββ | 8609/10682 [1:13:07<17:01, 2.03it/s]
|
895 |
81%|ββββββββ | 8610/10682 [1:13:08<17:02, 2.03it/s]
|
896 |
81%|ββββββββ | 8611/10682 [1:13:08<17:01, 2.03it/s]
|
897 |
81%|ββββββββ | 8612/10682 [1:13:09<17:01, 2.03it/s]
|
898 |
81%|ββββββββ | 8613/10682 [1:13:09<16:59, 2.03it/s]
|
899 |
81%|ββββββββ | 8614/10682 [1:13:10<16:59, 2.03it/s]
|
900 |
81%|ββββββββ | 8615/10682 [1:13:10<16:58, 2.03it/s]
|
901 |
81%|ββββββββ | 8616/10682 [1:13:11<16:57, 2.03it/s]
|
902 |
81%|ββββββββ | 8617/10682 [1:13:11<16:58, 2.03it/s]
|
903 |
81%|ββββββββ | 8618/10682 [1:13:12<16:56, 2.03it/s]
|
904 |
81%|ββββββββ | 8619/10682 [1:13:12<16:55, 2.03it/s]
|
905 |
81%|ββββββββ | 8620/10682 [1:13:13<16:55, 2.03it/s]
|
906 |
81%|ββββββββ | 8621/10682 [1:13:13<16:54, 2.03it/s]
|
907 |
81%|ββββββββ | 8622/10682 [1:13:14<16:54, 2.03it/s]
|
908 |
81%|ββββββββ | 8623/10682 [1:13:14<16:54, 2.03it/s]
|
909 |
81%|ββββββββ | 8624/10682 [1:13:15<16:53, 2.03it/s]
|
910 |
81%|ββββββββ | 8625/10682 [1:13:15<16:53, 2.03it/s]
|
911 |
|
912 |
+
|
913 |
81%|ββββββββ | 8625/10682 [1:13:15<16:53, 2.03it/s]
|
914 |
81%|ββββββββ | 8626/10682 [1:13:16<16:53, 2.03it/s]
|
915 |
81%|ββββββββ | 8627/10682 [1:13:16<16:53, 2.03it/s]
|
916 |
81%|ββββββββ | 8628/10682 [1:13:17<16:52, 2.03it/s]
|
917 |
81%|ββββββββ | 8629/10682 [1:13:17<16:52, 2.03it/s]
|
918 |
81%|ββββββββ | 8630/10682 [1:13:18<16:50, 2.03it/s]
|
919 |
81%|ββββββββ | 8631/10682 [1:13:18<16:50, 2.03it/s]
|
920 |
81%|ββββββββ | 8632/10682 [1:13:19<16:50, 2.03it/s]
|
921 |
81%|ββββββββ | 8633/10682 [1:13:19<16:49, 2.03it/s]
|
922 |
81%|ββββββββ | 8634/10682 [1:13:19<16:48, 2.03it/s]
|
923 |
81%|ββββββββ | 8635/10682 [1:13:20<16:48, 2.03it/s]
|
924 |
81%|ββββββββ | 8636/10682 [1:13:20<16:47, 2.03it/s]
|
925 |
81%|ββββββββ | 8637/10682 [1:13:21<16:47, 2.03it/s]
|
926 |
81%|ββββββββ | 8638/10682 [1:13:21<16:46, 2.03it/s]
|
927 |
81%|ββββββββ | 8639/10682 [1:13:22<16:45, 2.03it/s]
|
928 |
81%|ββββββββ | 8640/10682 [1:13:22<16:45, 2.03it/s]
|
929 |
81%|ββββββββ | 8641/10682 [1:13:23<16:44, 2.03it/s]
|
930 |
81%|ββββββββ | 8642/10682 [1:13:23<16:44, 2.03it/s]
|
931 |
81%|ββββββββ | 8643/10682 [1:13:24<16:43, 2.03it/s]
|
932 |
81%|ββββββββ | 8644/10682 [1:13:24<16:43, 2.03it/s]
|
933 |
81%|ββββββββ | 8645/10682 [1:13:25<16:44, 2.03it/s]
|
934 |
81%|ββββββββ | 8646/10682 [1:13:25<16:43, 2.03it/s]
|
935 |
81%|ββββββββ | 8647/10682 [1:13:26<16:44, 2.03it/s]
|
936 |
81%|ββββββββ | 8648/10682 [1:13:26<16:43, 2.03it/s]
|
937 |
81%|ββββββββ | 8649/10682 [1:13:27<16:42, 2.03it/s]
|
938 |
81%|ββββββββ | 8650/10682 [1:13:27<16:42, 2.03it/s]
|
939 |
{'loss': 2.8442, 'grad_norm': 0.2641572654247284, 'learning_rate': 0.00010625520936593375, 'epoch': 11.34}
|
940 |
+
|
941 |
81%|ββββββββ | 8650/10682 [1:13:27<16:42, 2.03it/s]
|
942 |
81%|ββββββββ | 8651/10682 [1:13:28<16:42, 2.03it/s]
|
943 |
81%|ββββββββ | 8652/10682 [1:13:28<16:41, 2.03it/s]
|
944 |
81%|ββββββββ | 8653/10682 [1:13:29<16:41, 2.03it/s]
|
945 |
81%|ββββββββ | 8654/10682 [1:13:29<16:40, 2.03it/s]
|
946 |
81%|ββββββββ | 8655/10682 [1:13:30<16:39, 2.03it/s]
|
947 |
81%|ββββββββ | 8656/10682 [1:13:30<16:38, 2.03it/s]
|
948 |
81%|ββββββββ | 8657/10682 [1:13:31<16:37, 2.03it/s]
|
949 |
81%|ββββββββ | 8658/10682 [1:13:31<16:36, 2.03it/s]
|
950 |
81%|ββββββββ | 8659/10682 [1:13:32<16:35, 2.03it/s]
|
951 |
81%|ββββββββ | 8660/10682 [1:13:32<16:35, 2.03it/s]
|
952 |
81%|ββββββββ | 8661/10682 [1:13:33<16:35, 2.03it/s]
|
953 |
81%|ββββββββ | 8662/10682 [1:13:33<16:34, 2.03it/s]
|
954 |
81%|ββββββββ | 8663/10682 [1:13:34<16:33, 2.03it/s]
|
955 |
81%|ββββββββ | 8664/10682 [1:13:34<16:33, 2.03it/s]
|
956 |
81%|ββββββββ | 8665/10682 [1:13:35<16:33, 2.03it/s]
|
957 |
81%|ββββββββ | 8666/10682 [1:13:35<16:33, 2.03it/s]
|
958 |
81%|ββββββββ | 8667/10682 [1:13:36<16:32, 2.03it/s]
|
959 |
81%|ββββββββ | 8668/10682 [1:13:36<16:31, 2.03it/s]
|
960 |
81%|ββββββββ | 8669/10682 [1:13:37<16:31, 2.03it/s]
|
961 |
81%|ββββββββ | 8670/10682 [1:13:37<16:31, 2.03it/s]
|
962 |
81%|ββββββββ | 8671/10682 [1:13:38<16:31, 2.03it/s]
|
963 |
81%|ββββββββ | 8672/10682 [1:13:38<16:30, 2.03it/s]
|
964 |
81%|ββββββββ | 8673/10682 [1:13:39<16:28, 2.03it/s]
|
965 |
81%|ββββββββ | 8674/10682 [1:13:39<16:29, 2.03it/s]
|
966 |
81%|ββββββββ | 8675/10682 [1:13:40<16:28, 2.03it/s]{'loss': 2.8423, 'grad_norm': 0.2601149380207062, 'learning_rate': 0.0001037506276287885, 'epoch': 11.37}
|
967 |
+
|
968 |
|
969 |
81%|ββββββββ | 8675/10682 [1:13:40<16:28, 2.03it/s]
|
970 |
81%|ββββββββ | 8676/10682 [1:13:40<16:29, 2.03it/s]
|
971 |
81%|ββββββββ | 8677/10682 [1:13:41<16:28, 2.03it/s]
|
972 |
81%|ββββββββ | 8678/10682 [1:13:41<16:28, 2.03it/s]
|
973 |
81%|ββββββββ | 8679/10682 [1:13:42<16:27, 2.03it/s]
|
974 |
81%|βββββββββ | 8680/10682 [1:13:42<16:26, 2.03it/s]
|
975 |
81%|βββββββββ | 8681/10682 [1:13:43<16:26, 2.03it/s]
|
976 |
81%|βββββββββ | 8682/10682 [1:13:43<16:24, 2.03it/s]
|
977 |
81%|βββββββββ | 8683/10682 [1:13:44<16:24, 2.03it/s]
|
978 |
81%|βββββββββ | 8684/10682 [1:13:44<16:23, 2.03it/s]
|
979 |
81%|βββββββββ | 8685/10682 [1:13:45<16:23, 2.03it/s]
|
980 |
81%|βββββββββ | 8686/10682 [1:13:45<16:23, 2.03it/s]
|
981 |
81%|βββββββββ | 8687/10682 [1:13:46<16:21, 2.03it/s]
|
982 |
81%|βββββββββ | 8688/10682 [1:13:46<16:21, 2.03it/s]
|
983 |
81%|βββββββββ | 8689/10682 [1:13:47<16:21, 2.03it/s]
|
984 |
81%|βββββββββ | 8690/10682 [1:13:47<16:19, 2.03it/s]
|
985 |
81%|βββββββββ | 8691/10682 [1:13:48<16:19, 2.03it/s]
|
986 |
81%|βββββββββ | 8692/10682 [1:13:48<16:18, 2.03it/s]
|
987 |
81%|βββββββββ | 8693/10682 [1:13:49<16:18, 2.03it/s]
|
988 |
81%|βββββββββ | 8694/10682 [1:13:49<16:18, 2.03it/s]
|
989 |
81%|βββββββββ | 8695/10682 [1:13:50<16:17, 2.03it/s]
|
990 |
81%|βββββββββ | 8696/10682 [1:13:50<16:18, 2.03it/s]
|
991 |
81%|βββββββββ | 8697/10682 [1:13:51<16:17, 2.03it/s]
|
992 |
81%|βββββββββ | 8698/10682 [1:13:51<16:16, 2.03it/s]
|
993 |
81%|βββββββββ | 8699/10682 [1:13:52<16:16, 2.03it/s]
|
994 |
81%|βββββββββ | 8700/10682 [1:13:52<16:15, 2.03it/s]{'loss': 2.8562, 'grad_norm': 0.2651384770870209, 'learning_rate': 0.0001012724960361826, 'epoch': 11.4}
|
995 |
+
|
996 |
|
997 |
81%|βββββββββ | 8700/10682 [1:13:52<16:15, 2.03it/s]
|
998 |
81%|βββββββββ | 8701/10682 [1:13:53<16:16, 2.03it/s]
|
999 |
81%|βββββββββ | 8702/10682 [1:13:53<16:14, 2.03it/s]
|
1000 |
81%|βββββββββ | 8703/10682 [1:13:53<16:14, 2.03it/s]
|
1001 |
81%|βββββββββ | 8704/10682 [1:13:54<16:14, 2.03it/s]
|
1002 |
81%|βββββββββ | 8705/10682 [1:13:54<16:13, 2.03it/s]
|
1003 |
82%|βββββββββ | 8706/10682 [1:13:55<16:13, 2.03it/s]
|
1004 |
82%|βββββββββ | 8707/10682 [1:13:55<16:12, 2.03it/s]
|
1005 |
82%|βββββββββ | 8708/10682 [1:13:56<16:10, 2.03it/s]
|
1006 |
82%|βββββββββ | 8709/10682 [1:13:56<16:11, 2.03it/s]
|
1007 |
82%|βββββββββ | 8710/10682 [1:13:57<16:10, 2.03it/s]
|
1008 |
82%|βββββββββ | 8711/10682 [1:13:57<16:10, 2.03it/s]
|
1009 |
82%|βββββββββ | 8712/10682 [1:13:58<16:09, 2.03it/s]
|
1010 |
82%|βββββββββ | 8713/10682 [1:13:58<16:09, 2.03it/s]
|
1011 |
82%|βββββββββ | 8714/10682 [1:13:59<16:09, 2.03it/s]
|
1012 |
82%|βββββββββ | 8715/10682 [1:13:59<16:08, 2.03it/s]
|
1013 |
82%|βββββββββ | 8716/10682 [1:14:00<16:08, 2.03it/s]
|
1014 |
82%|βββββββββ | 8717/10682 [1:14:00<16:08, 2.03it/s]
|
1015 |
82%|βββββββββ | 8718/10682 [1:14:01<16:06, 2.03it/s]
|
1016 |
82%|βββββββββ | 8719/10682 [1:14:01<16:06, 2.03it/s]
|
1017 |
82%|βββββββββ | 8720/10682 [1:14:02<16:05, 2.03it/s]
|
1018 |
82%|βββββββββ | 8721/10682 [1:14:02<16:04, 2.03it/s]
|
1019 |
82%|βββββββββ | 8722/10682 [1:14:03<16:05, 2.03it/s]
|
1020 |
82%|βββββββββ | 8723/10682 [1:14:03<16:04, 2.03it/s]
|
1021 |
82%|βββββββββ | 8724/10682 [1:14:04<16:04, 2.03it/s]
|
1022 |
82%|βββββββββ | 8725/10682 [1:14:04<16:03, 2.03it/s]{'loss': 2.8428, 'grad_norm': 0.26036229729652405, 'learning_rate': 9.882098000652034e-05, 'epoch': 11.44}
|
1023 |
+
|
1024 |
|
1025 |
82%|βββββββββ | 8725/10682 [1:14:04<16:03, 2.03it/s]
|
1026 |
82%|βββββββββ | 8726/10682 [1:14:05<16:04, 2.03it/s]
|
1027 |
82%|βββββββββ | 8727/10682 [1:14:05<16:03, 2.03it/s]
|
1028 |
82%|βββββββββ | 8728/10682 [1:14:06<16:02, 2.03it/s]
|
1029 |
82%|βββββββββ | 8729/10682 [1:14:06<16:03, 2.03it/s]
|
1030 |
82%|βββββββββ | 8730/10682 [1:14:07<16:02, 2.03it/s]
|
1031 |
82%|βββββββββ | 8731/10682 [1:14:07<16:01, 2.03it/s]
|
1032 |
82%|βββββββββ | 8732/10682 [1:14:08<16:01, 2.03it/s]
|
1033 |
82%|βββββββββ | 8733/10682 [1:14:08<16:00, 2.03it/s]
|
1034 |
82%|βββββββββ | 8734/10682 [1:14:09<16:00, 2.03it/s]
|
1035 |
82%|βββββββββ | 8735/10682 [1:14:09<15:59, 2.03it/s]
|
1036 |
82%|βββββββββ | 8736/10682 [1:14:10<15:59, 2.03it/s]
|
1037 |
82%|βββββββββ | 8737/10682 [1:14:10<15:58, 2.03it/s]
|
1038 |
82%|βββββββββ | 8738/10682 [1:14:11<15:57, 2.03it/s]
|
1039 |
82%|βββββββββ | 8739/10682 [1:14:11<15:57, 2.03it/s]
|
1040 |
82%|βββββββββ | 8740/10682 [1:14:12<15:56, 2.03it/s]
|
1041 |
82%|βββββββββ | 8741/10682 [1:14:12<15:55, 2.03it/s]
|
1042 |
82%|βββββββββ | 8742/10682 [1:14:13<15:55, 2.03it/s]
|
1043 |
82%|βββββββββ | 8743/10682 [1:14:13<15:55, 2.03it/s]
|
1044 |
82%|βββββββββ | 8744/10682 [1:14:14<15:55, 2.03it/s]
|
1045 |
82%|βββββββββ | 8745/10682 [1:14:14<15:53, 2.03it/s]
|
1046 |
82%|βββββββββ | 8746/10682 [1:14:15<15:53, 2.03it/s]
|
1047 |
82%|βββββββββ | 8747/10682 [1:14:15<15:53, 2.03it/s]
|
1048 |
82%|βββββββββ | 8748/10682 [1:14:16<15:52, 2.03it/s]
|
1049 |
82%|βββββββββ | 8749/10682 [1:14:16<15:52, 2.03it/s]
|
1050 |
82%|βββββββββ | 8750/10682 [1:14:17<15:51, 2.03it/s]{'loss': 2.8425, 'grad_norm': 0.2622228264808655, 'learning_rate': 9.639624318158335e-05, 'epoch': 11.47}
|
1051 |
+
|
1052 |
|
1053 |
82%|βββββββββ | 8750/10682 [1:14:17<15:51, 2.03it/s]
|
1054 |
82%|ββββββββοΏ½οΏ½ | 8751/10682 [1:14:17<15:52, 2.03it/s]
|
1055 |
82%|βββββββββ | 8752/10682 [1:14:18<15:51, 2.03it/s]
|
1056 |
82%|βββββββββ | 8753/10682 [1:14:18<15:50, 2.03it/s]
|
1057 |
82%|βββββββββ | 8754/10682 [1:14:19<15:49, 2.03it/s]
|
1058 |
82%|βββββββββ | 8755/10682 [1:14:19<15:48, 2.03it/s]
|
1059 |
82%|βββββββββ | 8756/10682 [1:14:20<15:48, 2.03it/s]
|
1060 |
82%|βββββββββ | 8757/10682 [1:14:20<15:47, 2.03it/s]
|
1061 |
82%|βββββββββ | 8758/10682 [1:14:21<15:46, 2.03it/s]
|
1062 |
82%|βββββββββ | 8759/10682 [1:14:21<15:45, 2.03it/s]
|
1063 |
82%|βββββββββ | 8760/10682 [1:14:22<15:45, 2.03it/s]
|
1064 |
82%|βββββββββ | 8761/10682 [1:14:22<15:44, 2.03it/s]
|
1065 |
82%|βββββββββ | 8762/10682 [1:14:23<15:45, 2.03it/s]
|
1066 |
82%|βββββββββ | 8763/10682 [1:14:23<15:44, 2.03it/s]
|
1067 |
82%|βββββββββ | 8764/10682 [1:14:24<15:44, 2.03it/s]
|
1068 |
82%|βββββββββ | 8765/10682 [1:14:24<15:44, 2.03it/s]
|
1069 |
82%|βββββββββ | 8766/10682 [1:14:25<15:43, 2.03it/s]
|
1070 |
82%|βββββββββ | 8767/10682 [1:14:25<15:43, 2.03it/s]
|
1071 |
82%|βββββββββ | 8768/10682 [1:14:25<15:42, 2.03it/s]
|
1072 |
82%|βββββββββ | 8769/10682 [1:14:26<15:40, 2.03it/s]
|
1073 |
82%|βββββββββ | 8770/10682 [1:14:26<15:41, 2.03it/s]
|
1074 |
82%|βββββββββ | 8771/10682 [1:14:27<15:40, 2.03it/s]
|
1075 |
82%|βββββββββ | 8772/10682 [1:14:27<15:40, 2.03it/s]
|
1076 |
82%|βββββββββ | 8773/10682 [1:14:28<15:40, 2.03it/s]
|
1077 |
82%|βββββββββ | 8774/10682 [1:14:28<15:38, 2.03it/s]
|
1078 |
82%|βββββββββ | 8775/10682 [1:14:29<15:39, 2.03it/s]{'loss': 2.8524, 'grad_norm': 0.26476430892944336, 'learning_rate': 9.399844741560781e-05, 'epoch': 11.5}
|
1079 |
|
1080 |
+
|
1081 |
82%|βββββββββ | 8775/10682 [1:14:29<15:39, 2.03it/s]
|
1082 |
82%|βββββββββ | 8776/10682 [1:14:29<15:40, 2.03it/s]
|
1083 |
82%|βββββββββ | 8777/10682 [1:14:30<15:41, 2.02it/s]
|
1084 |
82%|βββββββββ | 8778/10682 [1:14:30<15:38, 2.03it/s]
|
1085 |
82%|βββββββββ | 8779/10682 [1:14:31<15:38, 2.03it/s]
|
1086 |
82%|βββββββββ | 8780/10682 [1:14:31<15:38, 2.03it/s]
|
1087 |
82%|βββββββββ | 8781/10682 [1:14:32<15:36, 2.03it/s]
|
1088 |
82%|βββββββββ | 8782/10682 [1:14:32<15:36, 2.03it/s]
|
1089 |
82%|βββββββββ | 8783/10682 [1:14:33<15:35, 2.03it/s]
|
1090 |
82%|βββββββββ | 8784/10682 [1:14:33<15:33, 2.03it/s]
|
1091 |
82%|βββββββββ | 8785/10682 [1:14:34<15:34, 2.03it/s]
|
1092 |
82%|βββββββββ | 8786/10682 [1:14:34<15:33, 2.03it/s]
|
1093 |
82%|βββββββββ | 8787/10682 [1:14:35<15:33, 2.03it/s]
|
1094 |
82%|βββββββββ | 8788/10682 [1:14:35<15:34, 2.03it/s]
|
1095 |
82%|βββββββββ | 8789/10682 [1:14:36<15:33, 2.03it/s]
|
1096 |
82%|βββββββββ | 8790/10682 [1:14:36<15:33, 2.03it/s]
|
1097 |
82%|βββββββββ | 8791/10682 [1:14:37<15:32, 2.03it/s]
|
1098 |
82%|βββββββββ | 8792/10682 [1:14:37<15:31, 2.03it/s]
|
1099 |
82%|βββββββββ | 8793/10682 [1:14:38<15:31, 2.03it/s]
|
1100 |
82%|βββββββββ | 8794/10682 [1:14:38<15:30, 2.03it/s]
|
1101 |
82%|βββββββββ | 8795/10682 [1:14:39<15:29, 2.03it/s]
|
1102 |
82%|βββββββββ | 8796/10682 [1:14:39<15:29, 2.03it/s]
|
1103 |
82%|βββββββββ | 8797/10682 [1:14:40<15:28, 2.03it/s]
|
1104 |
82%|βββββββββ | 8798/10682 [1:14:40<15:26, 2.03it/s]
|
1105 |
82%|βββββββββ | 8799/10682 [1:14:41<15:27, 2.03it/s]
|
1106 |
82%|βββββββββ | 8800/10682 [1:14:41<15:27, 2.03it/s]{'loss': 2.8376, 'grad_norm': 0.25995054841041565, 'learning_rate': 9.162775276448015e-05, 'epoch': 11.53}
|
1107 |
+
|
1108 |
|
1109 |
82%|βββββββββ | 8800/10682 [1:14:41<15:27, 2.03it/s]
|
1110 |
82%|βββββββββ | 8801/10682 [1:14:42<15:28, 2.03it/s]
|
1111 |
82%|βββββββββ | 8802/10682 [1:14:42<15:26, 2.03it/s]
|
1112 |
82%|βββββββββ | 8803/10682 [1:14:43<15:24, 2.03it/s]
|
1113 |
82%|βββββββββ | 8804/10682 [1:14:43<15:23, 2.03it/s]
|
1114 |
82%|βββββββββ | 8805/10682 [1:14:44<15:23, 2.03it/s]
|
1115 |
82%|βββββββββ | 8806/10682 [1:14:44<15:23, 2.03it/s]
|
1116 |
82%|βββββββββ | 8807/10682 [1:14:45<15:22, 2.03it/s]
|
1117 |
82%|βββββββββ | 8808/10682 [1:14:45<15:22, 2.03it/s]
|
1118 |
82%|βββββββββ | 8809/10682 [1:14:46<15:22, 2.03it/s]
|
1119 |
82%|βββββββββ | 8810/10682 [1:14:46<15:21, 2.03it/s]
|
1120 |
82%|βββββββββ | 8811/10682 [1:14:47<15:21, 2.03it/s]
|
1121 |
82%|βββββββββ | 8812/10682 [1:14:47<15:21, 2.03it/s]
|
1122 |
83%|βββββββββ | 8813/10682 [1:14:48<15:20, 2.03it/s]
|
1123 |
83%|βββββββββ | 8814/10682 [1:14:48<15:20, 2.03it/s]
|
1124 |
83%|οΏ½οΏ½οΏ½ββββββββ | 8815/10682 [1:14:49<15:18, 2.03it/s]
|
1125 |
83%|βββββββββ | 8816/10682 [1:14:49<15:18, 2.03it/s]
|
1126 |
83%|βββββββββ | 8817/10682 [1:14:50<15:18, 2.03it/s]
|
1127 |
83%|βββββββββ | 8818/10682 [1:14:50<15:18, 2.03it/s]
|
1128 |
83%|βββββββββ | 8819/10682 [1:14:51<15:18, 2.03it/s]
|
1129 |
83%|βββββββββ | 8820/10682 [1:14:51<15:16, 2.03it/s]
|
1130 |
83%|βββββββββ | 8821/10682 [1:14:52<15:16, 2.03it/s]
|
1131 |
83%|βββββββββ | 8822/10682 [1:14:52<15:16, 2.03it/s]
|
1132 |
83%|βββββββββ | 8823/10682 [1:14:53<15:15, 2.03it/s]
|
1133 |
83%|βββββββββ | 8824/10682 [1:14:53<15:15, 2.03it/s]
|
1134 |
83%|βββββββββ | 8825/10682 [1:14:54<15:14, 2.03it/s]
|
1135 |
|
1136 |
+
|
1137 |
83%|βββββββββ | 8825/10682 [1:14:54<15:14, 2.03it/s]
|
1138 |
83%|βββββββββ | 8826/10682 [1:14:54<15:15, 2.03it/s]
|
1139 |
83%|βββββββββ | 8827/10682 [1:14:55<15:14, 2.03it/s]
|
1140 |
83%|βββββββββ | 8828/10682 [1:14:55<15:13, 2.03it/s]
|
1141 |
83%|βββββββββ | 8829/10682 [1:14:56<16:34, 1.86it/s]
|
1142 |
83%|βββββββββ | 8830/10682 [1:14:56<16:09, 1.91it/s]
|
1143 |
83%|βββββββββ | 8831/10682 [1:14:57<15:51, 1.94it/s]
|
1144 |
83%|βββββββββ | 8832/10682 [1:14:57<15:38, 1.97it/s]
|
1145 |
83%|βββββββββ | 8833/10682 [1:14:58<15:30, 1.99it/s]
|
1146 |
83%|βββββββββ | 8834/10682 [1:14:58<15:23, 2.00it/s]
|
1147 |
83%|βββββββββ | 8835/10682 [1:14:59<15:19, 2.01it/s]
|
1148 |
83%|βββββββββ | 8836/10682 [1:14:59<15:15, 2.02it/s]
|
1149 |
83%|βββββββββ | 8837/10682 [1:15:00<15:13, 2.02it/s]
|
1150 |
83%|βββββββββ | 8838/10682 [1:15:00<15:11, 2.02it/s]
|
1151 |
83%|βββββββββ | 8839/10682 [1:15:01<15:09, 2.03it/s]
|
1152 |
83%|βββββββββ | 8840/10682 [1:15:01<15:09, 2.03it/s]
|
1153 |
83%|βββββββββ | 8841/10682 [1:15:02<15:07, 2.03it/s]
|
1154 |
83%|βββββββββ | 8842/10682 [1:15:02<15:06, 2.03it/s]
|
1155 |
83%|βββββββββ | 8843/10682 [1:15:03<15:06, 2.03it/s]
|
1156 |
83%|βββββββββ | 8844/10682 [1:15:03<15:05, 2.03it/s]
|
1157 |
83%|βββββββββ | 8845/10682 [1:15:04<15:05, 2.03it/s]
|
1158 |
83%|βββββββββ | 8846/10682 [1:15:04<15:04, 2.03it/s]
|
1159 |
83%|βββββββββ | 8847/10682 [1:15:05<15:04, 2.03it/s]
|
1160 |
83%|βββββββββ | 8848/10682 [1:15:05<15:03, 2.03it/s]
|
1161 |
83%|βββββββββ | 8849/10682 [1:15:06<15:02, 2.03it/s]
|
1162 |
83%|βββββββββ | 8850/10682 [1:15:06<15:02, 2.03it/s]{'loss': 2.8552, 'grad_norm': 0.2628968060016632, 'learning_rate': 8.6968297974584e-05, 'epoch': 11.6}
|
1163 |
|
1164 |
+
|
1165 |
83%|βββββββββ | 8850/10682 [1:15:06<15:02, 2.03it/s]
|
1166 |
83%|βββββββββ | 8851/10682 [1:15:07<15:03, 2.03it/s]
|
1167 |
83%|βββββββββ | 8852/10682 [1:15:07<15:02, 2.03it/s]
|
1168 |
83%|βββββββββ | 8853/10682 [1:15:08<15:01, 2.03it/s]
|
1169 |
83%|βββββββββ | 8854/10682 [1:15:08<15:01, 2.03it/s]
|
1170 |
83%|βββββββββ | 8855/10682 [1:15:09<15:01, 2.03it/s]
|
1171 |
83%|βββββββββ | 8856/10682 [1:15:09<15:00, 2.03it/s]
|
1172 |
83%|βββββββββ | 8857/10682 [1:15:09<15:01, 2.03it/s]
|
1173 |
83%|βββββββββ | 8858/10682 [1:15:10<14:59, 2.03it/s]
|
1174 |
83%|βββββββββ | 8859/10682 [1:15:10<14:58, 2.03it/s]
|
1175 |
83%|βββββββββ | 8860/10682 [1:15:11<14:58, 2.03it/s]
|
1176 |
83%|βββββββββ | 8861/10682 [1:15:11<14:57, 2.03it/s]
|
1177 |
83%|βββββββββ | 8862/10682 [1:15:12<14:56, 2.03it/s]
|
1178 |
83%|βββββββββ | 8863/10682 [1:15:12<14:56, 2.03it/s]
|
1179 |
83%|βββββββββ | 8864/10682 [1:15:13<14:55, 2.03it/s]
|
1180 |
83%|βββββββββ | 8865/10682 [1:15:13<14:55, 2.03it/s]
|
1181 |
83%|βββββββββ | 8866/10682 [1:15:14<14:54, 2.03it/s]
|
1182 |
83%|βββββββββ | 8867/10682 [1:15:14<14:53, 2.03it/s]
|
1183 |
83%|βββββββββ | 8868/10682 [1:15:15<14:53, 2.03it/s]
|
1184 |
83%|βββββββββ | 8869/10682 [1:15:15<14:51, 2.03it/s]
|
1185 |
83%|βββββββββ | 8870/10682 [1:15:16<14:52, 2.03it/s]
|
1186 |
83%|βββββββββ | 8871/10682 [1:15:16<14:51, 2.03it/s]
|
1187 |
83%|βββββββββ | 8872/10682 [1:15:17<14:51, 2.03it/s]
|
1188 |
83%|βββββββββ | 8873/10682 [1:15:17<14:50, 2.03it/s]
|
1189 |
83%|βββββββββ | 8874/10682 [1:15:18<14:49, 2.03it/s]
|
1190 |
83%|βββββββββ | 8875/10682 [1:15:18<14:50, 2.03it/s]
|
1191 |
{'loss': 2.8535, 'grad_norm': 0.2602517902851105, 'learning_rate': 8.467984886028967e-05, 'epoch': 11.63}
|
1192 |
+
|
1193 |
83%|βββββββββ | 8875/10682 [1:15:18<14:50, 2.03it/s]
|
1194 |
83%|βββββββββ | 8876/10682 [1:15:19<14:50, 2.03it/s]
|
1195 |
83%|βββββββββ | 8877/10682 [1:15:19<14:50, 2.03it/s]
|
1196 |
83%|βββββββββ | 8878/10682 [1:15:20<14:49, 2.03it/s]
|
1197 |
83%|βββββββββ | 8879/10682 [1:15:20<14:47, 2.03it/s]
|
1198 |
83%|βββββββββ | 8880/10682 [1:15:21<14:47, 2.03it/s]
|
1199 |
83%|βββββββββ | 8881/10682 [1:15:21<14:46, 2.03it/s]
|
1200 |
83%|βββββββββ | 8882/10682 [1:15:22<14:46, 2.03it/s]
|
1201 |
83%|βββββββββ | 8883/10682 [1:15:22<14:45, 2.03it/s]
|
1202 |
83%|βββββββββ | 8884/10682 [1:15:23<14:44, 2.03it/s]
|
1203 |
83%|βββββββββ | 8885/10682 [1:15:23<14:45, 2.03it/s]
|
1204 |
83%|βββββββββ | 8886/10682 [1:15:24<14:43, 2.03it/s]
|
1205 |
83%|βββββββββ | 8887/10682 [1:15:24<14:43, 2.03it/s]
|
1206 |
83%|βββββββββ | 8888/10682 [1:15:25<14:43, 2.03it/s]
|
1207 |
83%|βββββββββ | 8889/10682 [1:15:25<14:42, 2.03it/s]
|
1208 |
83%|βββββββββ | 8890/10682 [1:15:26<14:42, 2.03it/s]
|
1209 |
83%|βββββββββ | 8891/10682 [1:15:26<14:41, 2.03it/s]
|
1210 |
83%|βββββββββ | 8892/10682 [1:15:27<14:41, 2.03it/s]
|
1211 |
83%|βββββββββ | 8893/10682 [1:15:27<14:41, 2.03it/s]
|
1212 |
83%|βββββββββ | 8894/10682 [1:15:28<14:40, 2.03it/s]
|
1213 |
83%|βββββββββ | 8895/10682 [1:15:28<14:40, 2.03it/s]
|
1214 |
83%|βββββββββ | 8896/10682 [1:15:29<14:38, 2.03it/s]
|
1215 |
83%|βββββββββ | 8897/10682 [1:15:29<14:38, 2.03it/s]
|
1216 |
83%|βββββββββ | 8898/10682 [1:15:30<14:38, 2.03it/s]
|
1217 |
83%|βββββββββ | 8899/10682 [1:15:30<14:37, 2.03it/s]
|
1218 |
83%|βββββββββ | 8900/10682 [1:15:31<14:37, 2.03it/s]{'loss': 2.8613, 'grad_norm': 0.264787495136261, 'learning_rate': 8.24191228890303e-05, 'epoch': 11.66}
|
1219 |
+
|
1220 |
|
1221 |
83%|βββββββββ | 8900/10682 [1:15:31<14:37, 2.03it/s]
|
1222 |
83%|βββββββββ | 8901/10682 [1:15:31<14:37, 2.03it/s]
|
1223 |
83%|βββββββββ | 8902/10682 [1:15:32<14:36, 2.03it/s]
|
1224 |
83%|βββββββββ | 8903/10682 [1:15:32<14:35, 2.03it/s]
|
1225 |
83%|βββββββββ | 8904/10682 [1:15:33<14:34, 2.03it/s]
|
1226 |
83%|βββββββββ | 8905/10682 [1:15:33<14:34, 2.03it/s]
|
1227 |
83%|βββββββββ | 8906/10682 [1:15:34<14:34, 2.03it/s]
|
1228 |
83%|βββββββββ | 8907/10682 [1:15:34<14:33, 2.03it/s]
|
1229 |
83%|βββββββββ | 8908/10682 [1:15:35<14:32, 2.03it/s]
|
1230 |
83%|βββββββββ | 8909/10682 [1:15:35<14:32, 2.03it/s]
|
1231 |
83%|βββββββββ | 8910/10682 [1:15:36<14:31, 2.03it/s]
|
1232 |
83%|βββββββββ | 8911/10682 [1:15:36<14:31, 2.03it/s]
|
1233 |
83%|βββββββββ | 8912/10682 [1:15:37<14:30, 2.03it/s]
|
1234 |
83%|βββββββββ | 8913/10682 [1:15:37<14:30, 2.03it/s]
|
1235 |
83%|βββββββββ | 8914/10682 [1:15:38<14:29, 2.03it/s]
|
1236 |
83%|βββββββββ | 8915/10682 [1:15:38<14:29, 2.03it/s]
|
1237 |
83%|βββββββββ | 8916/10682 [1:15:39<14:28, 2.03it/s]
|
1238 |
83%|βββββββββ | 8917/10682 [1:15:39<14:28, 2.03it/s]
|
1239 |
83%|βββββββββ | 8918/10682 [1:15:40<14:26, 2.03it/s]
|
1240 |
83%|βββββββββ | 8919/10682 [1:15:40<14:27, 2.03it/s]
|
1241 |
84%|βββββββββ | 8920/10682 [1:15:41<14:27, 2.03it/s]
|
1242 |
84%|βββββββββ | 8921/10682 [1:15:41<14:26, 2.03it/s]
|
1243 |
84%|βββββββββ | 8922/10682 [1:15:41<14:25, 2.03it/s]
|
1244 |
84%|βββββββββ | 8923/10682 [1:15:42<14:25, 2.03it/s]
|
1245 |
84%|βββββββββ | 8924/10682 [1:15:42<14:24, 2.03it/s]
|
1246 |
84%|βββββββββ | 8925/10682 [1:15:43<14:24, 2.03it/s]{'loss': 2.8574, 'grad_norm': 0.26087984442710876, 'learning_rate': 8.018627096711106e-05, 'epoch': 11.7}
|
1247 |
|
1248 |
+
|
1249 |
84%|βββββββββ | 8925/10682 [1:15:43<14:24, 2.03it/s]
|
1250 |
84%|βββββββββ | 8926/10682 [1:15:43<14:25, 2.03it/s]
|
1251 |
84%|βββββββββ | 8927/10682 [1:15:44<14:24, 2.03it/s]
|
1252 |
84%|βββββββββ | 8928/10682 [1:15:44<14:25, 2.03it/s]
|
1253 |
84%|βββββββββ | 8929/10682 [1:15:45<14:24, 2.03it/s]
|
1254 |
84%|βββββββββ | 8930/10682 [1:15:45<14:23, 2.03it/s]
|
1255 |
84%|βββββββββ | 8931/10682 [1:15:46<14:23, 2.03it/s]
|
1256 |
84%|βββββββββ | 8932/10682 [1:15:47<15:39, 1.86it/s]
|
1257 |
84%|βββββββββ | 8933/10682 [1:15:47<15:15, 1.91it/s]
|
1258 |
84%|βββββββββ | 8934/10682 [1:15:48<14:58, 1.95it/s]
|
1259 |
84%|βββββββββ | 8935/10682 [1:15:48<14:46, 1.97it/s]
|
1260 |
84%|βββββββββ | 8936/10682 [1:15:49<14:38, 1.99it/s]
|
1261 |
84%|βββββββββ | 8937/10682 [1:15:49<14:32, 2.00it/s]
|
1262 |
84%|βββββββββ | 8938/10682 [1:15:50<14:27, 2.01it/s]
|
1263 |
84%|ββββοΏ½οΏ½ββββ | 8939/10682 [1:15:50<14:24, 2.02it/s]
|
1264 |
84%|βββββββββ | 8940/10682 [1:15:51<14:22, 2.02it/s]
|
1265 |
84%|βββββββββ | 8941/10682 [1:15:51<14:20, 2.02it/s]
|
1266 |
84%|βββββββββ | 8942/10682 [1:15:51<14:19, 2.03it/s]
|
1267 |
84%|βββββββββ | 8943/10682 [1:15:52<14:17, 2.03it/s]
|
1268 |
84%|βββββββββ | 8944/10682 [1:15:52<14:16, 2.03it/s]
|
1269 |
84%|βββββββββ | 8945/10682 [1:15:53<14:15, 2.03it/s]
|
1270 |
84%|βββββββββ | 8946/10682 [1:15:53<14:15, 2.03it/s]
|
1271 |
84%|βββββββββ | 8947/10682 [1:15:54<14:14, 2.03it/s]
|
1272 |
84%|βββββββββ | 8948/10682 [1:15:54<14:13, 2.03it/s]
|
1273 |
84%|βββββββββ | 8949/10682 [1:15:55<14:13, 2.03it/s]
|
1274 |
84%|βββββββββ | 8950/10682 [1:15:55<14:12, 2.03it/s]{'loss': 2.8577, 'grad_norm': 0.2632240951061249, 'learning_rate': 7.798144214020909e-05, 'epoch': 11.73}
|
1275 |
|
1276 |
+
|
1277 |
84%|βββββββββ | 8950/10682 [1:15:55<14:12, 2.03it/s]
|
1278 |
84%|βββββββββ | 8951/10682 [1:15:56<14:14, 2.03it/s]
|
1279 |
84%|βββββββββ | 8952/10682 [1:15:56<14:12, 2.03it/s]
|
1280 |
84%|βββββββββ | 8953/10682 [1:15:57<14:12, 2.03it/s]
|
1281 |
84%|βββββββββ | 8954/10682 [1:15:57<14:11, 2.03it/s]
|
1282 |
84%|βββββββββ | 8955/10682 [1:15:58<14:10, 2.03it/s]
|
1283 |
84%|βββββββββ | 8956/10682 [1:15:58<14:10, 2.03it/s]
|
1284 |
84%|βββββββββ | 8957/10682 [1:15:59<14:10, 2.03it/s]
|
1285 |
84%|βββββββββ | 8958/10682 [1:15:59<14:07, 2.03it/s]
|
1286 |
84%|βββββββββ | 8959/10682 [1:16:00<14:07, 2.03it/s]
|
1287 |
84%|βββββββββ | 8960/10682 [1:16:00<14:08, 2.03it/s]
|
1288 |
84%|βββββββββ | 8961/10682 [1:16:01<14:08, 2.03it/s]
|
1289 |
84%|βββββββββ | 8962/10682 [1:16:01<14:07, 2.03it/s]
|
1290 |
84%|βββββββββ | 8963/10682 [1:16:02<14:07, 2.03it/s]
|
1291 |
84%|βββββββββ | 8964/10682 [1:16:02<14:06, 2.03it/s]
|
1292 |
84%|βββββββββ | 8965/10682 [1:16:03<14:05, 2.03it/s]
|
1293 |
84%|βββββββββ | 8966/10682 [1:16:03<14:05, 2.03it/s]
|
1294 |
84%|βββββββββ | 8967/10682 [1:16:04<14:05, 2.03it/s]
|
1295 |
84%|βββββββββ | 8968/10682 [1:16:04<14:05, 2.03it/s]
|
1296 |
84%|βββββββββ | 8969/10682 [1:16:05<14:04, 2.03it/s]
|
1297 |
84%|βββββββββ | 8970/10682 [1:16:05<14:03, 2.03it/s]
|
1298 |
84%|βββββββββ | 8971/10682 [1:16:06<14:03, 2.03it/s]
|
1299 |
84%|βββββββββ | 8972/10682 [1:16:06<14:02, 2.03it/s]
|
1300 |
84%|βββββββββ | 8973/10682 [1:16:07<14:02, 2.03it/s]
|
1301 |
84%|βββββββββ | 8974/10682 [1:16:07<14:01, 2.03it/s]
|
1302 |
84%|βββββββββ | 8975/10682 [1:16:08<14:00, 2.03it/s]
|
1303 |
|
1304 |
+
|
1305 |
84%|βββββββββ | 8975/10682 [1:16:08<14:00, 2.03it/s]
|
1306 |
84%|βββββββββ | 8976/10682 [1:16:08<14:00, 2.03it/s]
|
1307 |
84%|βββββββββ | 8977/10682 [1:16:09<13:59, 2.03it/s]
|
1308 |
84%|βββββββββ | 8978/10682 [1:16:09<13:59, 2.03it/s]
|
1309 |
84%|βββββββββ | 8979/10682 [1:16:10<13:58, 2.03it/s]
|
1310 |
84%|βββββββββ | 8980/10682 [1:16:10<13:58, 2.03it/s]
|
1311 |
84%|βββββββββ | 8981/10682 [1:16:11<13:57, 2.03it/s]
|
1312 |
84%|βββββββββ | 8982/10682 [1:16:11<13:57, 2.03it/s]
|
1313 |
84%|βββββββββ | 8983/10682 [1:16:12<13:57, 2.03it/s]
|
1314 |
84%|βββββββββ | 8984/10682 [1:16:12<13:56, 2.03it/s]
|
1315 |
84%|βββββββββ | 8985/10682 [1:16:13<13:56, 2.03it/s]
|
1316 |
84%|βββββββββ | 8986/10682 [1:16:13<13:55, 2.03it/s]
|
1317 |
84%|βββββββββ | 8987/10682 [1:16:14<13:54, 2.03it/s]
|
1318 |
84%|βββββββββ | 8988/10682 [1:16:14<13:55, 2.03it/s]
|
1319 |
84%|βββββββββ | 8989/10682 [1:16:15<13:53, 2.03it/s]
|
1320 |
84%|βββββββββ | 8990/10682 [1:16:15<13:52, 2.03it/s]
|
1321 |
84%|βββββββββ | 8991/10682 [1:16:16<13:53, 2.03it/s]
|
1322 |
84%|βββββββββ | 8992/10682 [1:16:16<13:52, 2.03it/s]
|
1323 |
84%|βββββββββ | 8993/10682 [1:16:17<13:52, 2.03it/s]
|
1324 |
84%|βββββββββ | 8994/10682 [1:16:17<13:51, 2.03it/s]
|
1325 |
84%|βββββββββ | 8995/10682 [1:16:18<13:50, 2.03it/s]
|
1326 |
84%|βββββββββ | 8996/10682 [1:16:18<13:51, 2.03it/s]
|
1327 |
84%|βββββββββ | 8997/10682 [1:16:19<13:49, 2.03it/s]
|
1328 |
84%|βββββββββ | 8998/10682 [1:16:19<13:48, 2.03it/s]
|
1329 |
84%|βββββββββ | 8999/10682 [1:16:20<13:48, 2.03it/s]
|
1330 |
84%|βββββββββ | 9000/10682 [1:16:20<13:47, 2.03it/s]
|
1331 |
{'loss': 2.8481, 'grad_norm': 0.2617539167404175, 'learning_rate': 7.365644059145782e-05, 'epoch': 11.8}
|
1332 |
+
|
1333 |
84%|βββββββββ | 9000/10682 [1:16:20<13:47, 2.03it/s]
|
1334 |
84%|βββββββββ | 9001/10682 [1:16:21<13:48, 2.03it/s]
|
1335 |
84%|βββββββββ | 9002/10682 [1:16:21<13:47, 2.03it/s]
|
1336 |
84%|βββββββββ | 9003/10682 [1:16:22<13:47, 2.03it/s]
|
1337 |
84%|βββββββββ | 9004/10682 [1:16:22<13:47, 2.03it/s]
|
1338 |
84%|βββββββββ | 9005/10682 [1:16:23<13:46, 2.03it/s]
|
1339 |
84%|βββββββββ | 9006/10682 [1:16:23<13:46, 2.03it/s]
|
1340 |
84%|βββββββββ | 9007/10682 [1:16:24<13:44, 2.03it/s]
|
1341 |
84%|βββββββββ | 9008/10682 [1:16:24<13:44, 2.03it/s]
|
1342 |
84%|βββββββββ | 9009/10682 [1:16:24<13:43, 2.03it/s]
|
1343 |
84%|βββββββββ | 9010/10682 [1:16:25<13:42, 2.03it/s]
|
1344 |
84%|βββββββββ | 9011/10682 [1:16:25<13:42, 2.03it/s]
|
1345 |
84%|βββββββββ | 9012/10682 [1:16:26<13:42, 2.03it/s]
|
1346 |
84%|βββββββββ | 9013/10682 [1:16:26<13:41, 2.03it/s]
|
1347 |
84%|βββββββββ | 9014/10682 [1:16:27<13:41, 2.03it/s]
|
1348 |
84%|βββββββββ | 9015/10682 [1:16:27<13:40, 2.03it/s]
|
1349 |
84%|βββββββββ | 9016/10682 [1:16:28<13:40, 2.03it/s]
|
1350 |
84%|βββββββββ | 9017/10682 [1:16:28<13:39, 2.03it/s]
|
1351 |
84%|βββββββββ | 9018/10682 [1:16:29<13:38, 2.03it/s]
|
1352 |
84%|βββββββββ | 9019/10682 [1:16:29<13:39, 2.03it/s]
|
1353 |
84%|βββββββββ | 9020/10682 [1:16:30<13:38, 2.03it/s]
|
1354 |
84%|βββββββββ | 9021/10682 [1:16:30<13:37, 2.03it/s]
|
1355 |
84%|βββββββββ | 9022/10682 [1:16:31<13:37, 2.03it/s]
|
1356 |
84%|βββββββββ | 9023/10682 [1:16:31<13:36, 2.03it/s]
|
1357 |
84%|βββββββββ | 9024/10682 [1:16:32<13:36, 2.03it/s]
|
1358 |
84%|βββββββββ | 9025/10682 [1:16:32<13:36, 2.03it/s]{'loss': 2.8558, 'grad_norm': 0.26141008734703064, 'learning_rate': 7.153655656890773e-05, 'epoch': 11.83}
|
1359 |
+
|
1360 |
|
1361 |
84%|βββββββββ | 9025/10682 [1:16:32<13:36, 2.03it/s]
|
1362 |
84%|βββββββββ | 9026/10682 [1:16:33<13:37, 2.03it/s]
|
1363 |
85%|βββββββββ | 9027/10682 [1:16:33<13:36, 2.03it/s]
|
1364 |
85%|βββββββββ | 9028/10682 [1:16:34<13:36, 2.02it/s]
|
1365 |
85%|βββββββββ | 9029/10682 [1:16:34<13:35, 2.03it/s]
|
1366 |
85%|βββββββββ | 9030/10682 [1:16:35<13:34, 2.03it/s]
|
1367 |
85%|βββββββββ | 9031/10682 [1:16:35<13:33, 2.03it/s]
|
1368 |
85%|βββββββββ | 9032/10682 [1:16:36<13:32, 2.03it/s]
|
1369 |
85%|βββββββββ | 9033/10682 [1:16:36<13:32, 2.03it/s]
|
1370 |
85%|βββββββββ | 9034/10682 [1:16:37<13:31, 2.03it/s]
|
1371 |
85%|βββββββββ | 9035/10682 [1:16:37<13:31, 2.03it/s]
|
1372 |
85%|βββββββββ | 9036/10682 [1:16:38<13:32, 2.03it/s]
|
1373 |
85%|βββββββββ | 9037/10682 [1:16:38<13:30, 2.03it/s]
|
1374 |
85%|βββββββββ | 9038/10682 [1:16:39<13:30, 2.03it/s]
|
1375 |
85%|βββββββββ | 9039/10682 [1:16:39<13:29, 2.03it/s]
|
1376 |
85%|βββββββββ | 9040/10682 [1:16:40<13:29, 2.03it/s]
|
1377 |
85%|βββββββββ | 9041/10682 [1:16:40<13:28, 2.03it/s]
|
1378 |
85%|βββββββββ | 9042/10682 [1:16:41<13:27, 2.03it/s]
|
1379 |
85%|βββββββββ | 9043/10682 [1:16:41<13:27, 2.03it/s]
|
1380 |
85%|βββββββββ | 9044/10682 [1:16:42<13:27, 2.03it/s]
|
1381 |
85%|βββββββββ | 9045/10682 [1:16:42<13:26, 2.03it/s]
|
1382 |
85%|βββββββββ | 9046/10682 [1:16:43<13:25, 2.03it/s]
|
1383 |
85%|βββββββββ | 9047/10682 [1:16:43<13:25, 2.03it/s]
|
1384 |
85%|βββββββββ | 9048/10682 [1:16:44<13:26, 2.03it/s]
|
1385 |
85%|βββββββββ | 9049/10682 [1:16:44<13:24, 2.03it/s]
|
1386 |
85%|βββββββββ | 9050/10682 [1:16:45<13:24, 2.03it/s]{'loss': 2.8601, 'grad_norm': 0.2692682147026062, 'learning_rate': 6.94452730207023e-05, 'epoch': 11.86}
|
1387 |
|
1388 |
+
|
1389 |
85%|βββββββββ | 9050/10682 [1:16:45<13:24, 2.03it/s]
|
1390 |
85%|βββββββββ | 9051/10682 [1:16:45<13:24, 2.03it/s]
|
1391 |
85%|βββββββββ | 9052/10682 [1:16:46<13:24, 2.03it/s]
|
1392 |
85%|βββββββββ | 9053/10682 [1:16:46<13:23, 2.03it/s]
|
1393 |
85%|βββββββββ | 9054/10682 [1:16:47<13:21, 2.03it/s]
|
1394 |
85%|βββββββββ | 9055/10682 [1:16:47<13:22, 2.03it/s]
|
1395 |
85%|βββββββββ | 9056/10682 [1:16:48<13:21, 2.03it/s]
|
1396 |
85%|βββββββββ | 9057/10682 [1:16:48<13:20, 2.03it/s]
|
1397 |
85%|βββββββββ | 9058/10682 [1:16:49<13:20, 2.03it/s]
|
1398 |
85%|βββββββββ | 9059/10682 [1:16:49<13:19, 2.03it/s]
|
1399 |
85%|βββββββββ | 9060/10682 [1:16:50<13:18, 2.03it/s]
|
1400 |
85%|βββββββββ | 9061/10682 [1:16:50<13:19, 2.03it/s]
|
1401 |
85%|βββββββββ | 9062/10682 [1:16:51<13:18, 2.03it/s]
|
1402 |
85%|βββββββοΏ½οΏ½β | 9063/10682 [1:16:51<13:17, 2.03it/s]
|
1403 |
85%|βββββββββ | 9064/10682 [1:16:52<13:18, 2.03it/s]
|
1404 |
85%|βββββββββ | 9065/10682 [1:16:52<13:17, 2.03it/s]
|
1405 |
85%|βββββββββ | 9066/10682 [1:16:53<13:17, 2.03it/s]
|
1406 |
85%|βββββββββ | 9067/10682 [1:16:53<13:16, 2.03it/s]
|
1407 |
85%|βββββββββ | 9068/10682 [1:16:54<13:15, 2.03it/s]
|
1408 |
85%|βββββββββ | 9069/10682 [1:16:54<13:15, 2.03it/s]
|
1409 |
85%|βββββββββ | 9070/10682 [1:16:55<13:14, 2.03it/s]
|
1410 |
85%|βββββββββ | 9071/10682 [1:16:55<13:13, 2.03it/s]
|
1411 |
85%|βββββββββ | 9072/10682 [1:16:56<13:14, 2.03it/s]
|
1412 |
85%|βββββββββ | 9073/10682 [1:16:56<13:13, 2.03it/s]
|
1413 |
85%|βββββββββ | 9074/10682 [1:16:57<13:12, 2.03it/s]
|
1414 |
85%|βββββββββ | 9075/10682 [1:16:57<13:11, 2.03it/s]
|
1415 |
{'loss': 2.8627, 'grad_norm': 0.2634871006011963, 'learning_rate': 6.738272954265156e-05, 'epoch': 11.89}
|
1416 |
+
|
1417 |
85%|βββββββββ | 9075/10682 [1:16:57<13:11, 2.03it/s]
|
1418 |
85%|βββββββββ | 9076/10682 [1:16:58<13:12, 2.03it/s]
|
1419 |
85%|βββββββββ | 9077/10682 [1:16:58<13:11, 2.03it/s]
|
1420 |
85%|βββββββββ | 9078/10682 [1:16:59<13:10, 2.03it/s]
|
1421 |
85%|βββββββββ | 9079/10682 [1:16:59<13:09, 2.03it/s]
|
1422 |
85%|βββββββββ | 9080/10682 [1:16:59<13:09, 2.03it/s]
|
1423 |
85%|βββββββββ | 9081/10682 [1:17:00<13:09, 2.03it/s]
|
1424 |
85%|βββββββββ | 9082/10682 [1:17:00<13:07, 2.03it/s]
|
1425 |
85%|βββββββββ | 9083/10682 [1:17:01<13:06, 2.03it/s]
|
1426 |
85%|βββββββββ | 9084/10682 [1:17:01<13:07, 2.03it/s]
|
1427 |
85%|βββββββββ | 9085/10682 [1:17:02<13:05, 2.03it/s]
|
1428 |
85%|βββββββββ | 9086/10682 [1:17:02<13:05, 2.03it/s]
|
1429 |
85%|βββββββββ | 9087/10682 [1:17:03<13:05, 2.03it/s]
|
1430 |
85%|βββββββββ | 9088/10682 [1:17:03<13:04, 2.03it/s]
|
1431 |
85%|βββββββββ | 9089/10682 [1:17:04<13:04, 2.03it/s]
|
1432 |
85%|βββββββββ | 9090/10682 [1:17:04<13:03, 2.03it/s]
|
1433 |
85%|βββββββββ | 9091/10682 [1:17:05<13:03, 2.03it/s]
|
1434 |
85%|βββββββββ | 9092/10682 [1:17:05<13:02, 2.03it/s]
|
1435 |
85%|βββββββββ | 9093/10682 [1:17:06<13:01, 2.03it/s]
|
1436 |
85%|βββββββββ | 9094/10682 [1:17:06<13:02, 2.03it/s]
|
1437 |
85%|βββββββββ | 9095/10682 [1:17:07<13:00, 2.03it/s]
|
1438 |
85%|βββββββββ | 9096/10682 [1:17:07<13:01, 2.03it/s]
|
1439 |
85%|βββββββββ | 9097/10682 [1:17:08<13:00, 2.03it/s]
|
1440 |
85%|βββββββββ | 9098/10682 [1:17:08<12:59, 2.03it/s]
|
1441 |
85%|βββββββββ | 9099/10682 [1:17:09<12:59, 2.03it/s]
|
1442 |
85%|βββββββββ | 9100/10682 [1:17:09<12:58, 2.03it/s]{'loss': 2.8535, 'grad_norm': 0.26476970314979553, 'learning_rate': 6.534906381212979e-05, 'epoch': 11.93}
|
1443 |
+
|
1444 |
|
1445 |
85%|βββββββββ | 9100/10682 [1:17:09<12:58, 2.03it/s]
|
1446 |
85%|βββββββββ | 9101/10682 [1:17:10<13:00, 2.03it/s]
|
1447 |
85%|βββββββββ | 9102/10682 [1:17:10<12:58, 2.03it/s]
|
1448 |
85%|βββββββββ | 9103/10682 [1:17:11<12:58, 2.03it/s]
|
1449 |
85%|βββββββββ | 9104/10682 [1:17:11<12:57, 2.03it/s]
|
1450 |
85%|βββββββββ | 9105/10682 [1:17:12<12:56, 2.03it/s]
|
1451 |
85%|βββββββββ | 9106/10682 [1:17:12<12:56, 2.03it/s]
|
1452 |
85%|βββββββββ | 9107/10682 [1:17:13<12:55, 2.03it/s]
|
1453 |
85%|βββββββββ | 9108/10682 [1:17:13<12:54, 2.03it/s]
|
1454 |
85%|βββββββββ | 9109/10682 [1:17:14<12:54, 2.03it/s]
|
1455 |
85%|βββββββββ | 9110/10682 [1:17:14<12:54, 2.03it/s]
|
1456 |
85%|βββββββββ | 9111/10682 [1:17:15<12:53, 2.03it/s]
|
1457 |
85%|βββββββββ | 9112/10682 [1:17:15<12:53, 2.03it/s]
|
1458 |
85%|βββββββββ | 9113/10682 [1:17:16<12:52, 2.03it/s]
|
1459 |
85%|βββββββββ | 9114/10682 [1:17:16<12:52, 2.03it/s]
|
1460 |
85%|βββββββββ | 9115/10682 [1:17:17<12:52, 2.03it/s]
|
1461 |
85%|βββββββββ | 9116/10682 [1:17:17<12:52, 2.03it/s]
|
1462 |
85%|βββββββββ | 9117/10682 [1:17:18<12:51, 2.03it/s]
|
1463 |
85%|βββββββββ | 9118/10682 [1:17:18<12:51, 2.03it/s]
|
1464 |
85%|βββββββββ | 9119/10682 [1:17:19<12:50, 2.03it/s]
|
1465 |
85%|βββββββββ | 9120/10682 [1:17:19<12:49, 2.03it/s]
|
1466 |
85%|βββββββββ | 9121/10682 [1:17:20<12:49, 2.03it/s]
|
1467 |
85%|βββββββββ | 9122/10682 [1:17:20<12:48, 2.03it/s]
|
1468 |
85%|βββββββββ | 9123/10682 [1:17:21<12:48, 2.03it/s]
|
1469 |
85%|βββββββββ | 9124/10682 [1:17:21<12:47, 2.03it/s]
|
1470 |
85%|βββββββββ | 9125/10682 [1:17:22<12:47, 2.03it/s]{'loss': 2.8536, 'grad_norm': 0.26291805505752563, 'learning_rate': 6.334441157888504e-05, 'epoch': 11.96}
|
1471 |
+
|
1472 |
|
1473 |
85%|βββββββββ | 9125/10682 [1:17:22<12:47, 2.03it/s]
|
1474 |
85%|βββββββββ | 9126/10682 [1:17:22<12:47, 2.03it/s]
|
1475 |
85%|βββββββββ | 9127/10682 [1:17:23<12:46, 2.03it/s]
|
1476 |
85%|βββββββββ | 9128/10682 [1:17:23<12:45, 2.03it/s]
|
1477 |
85%|βββββββββ | 9129/10682 [1:17:24<12:45, 2.03it/s]
|
1478 |
85%|βββββββββ | 9130/10682 [1:17:24<12:44, 2.03it/s]
|
1479 |
85%|βββββββββ | 9131/10682 [1:17:25<12:44, 2.03it/s]
|
1480 |
85%|βββββββββ | 9132/10682 [1:17:25<12:43, 2.03it/s]
|
1481 |
85%|βββββββββ | 9133/10682 [1:17:26<12:43, 2.03it/s]
|
1482 |
86%|βββββββββ | 9134/10682 [1:17:26<12:42, 2.03it/s]
|
1483 |
86%|βββββββββ | 9135/10682 [1:17:27<12:41, 2.03it/s]
|
1484 |
86%|βββββββββ | 9136/10682 [1:17:27<12:41, 2.03it/s]
|
1485 |
86%|βββββββββ | 9137/10682 [1:17:28<12:40, 2.03it/s]
|
1486 |
86%|βββββββββ | 9138/10682 [1:17:28<12:40, 2.03it/s]
|
1487 |
86%|βββββββββ | 9139/10682 [1:17:29<12:40, 2.03it/s]
|
1488 |
86%|βββββββββ | 9140/10682 [1:17:29<12:38, 2.03it/s]
|
1489 |
86%|βββββββββ | 9141/10682 [1:17:30<12:38, 2.03it/s]
|
1490 |
86%|βββββββββ | 9142/10682 [1:17:30<12:38, 2.03it/s]
|
1491 |
86%|βββββββββ | 9143/10682 [1:17:31<12:37, 2.03it/s]
|
1492 |
86%|βββββββββ | 9144/10682 [1:17:31<12:37, 2.03it/s]
|
1493 |
86%|βββββββββ | 9145/10682 [1:17:32<12:36, 2.03it/s]
|
1494 |
86%|βββββββββ | 9146/10682 [1:17:32<12:35, 2.03it/s]
|
1495 |
86%|βββββββββ | 9147/10682 [1:17:32<12:36, 2.03it/s]
|
1496 |
86%|βββββββββ | 9148/10682 [1:17:33<12:35, 2.03it/s]
|
1497 |
86%|βββββββββ | 9149/10682 [1:17:33<12:35, 2.03it/s]
|
1498 |
86%|βββββββββ | 9150/10682 [1:17:34<12:34, 2.03it/s]{'loss': 2.8583, 'grad_norm': 0.2615031898021698, 'learning_rate': 6.1368906655978e-05, 'epoch': 11.99}
|
1499 |
|
1500 |
+
|
1501 |
86%|βββββββββ | 9150/10682 [1:17:34<12:34, 2.03it/s]
|
1502 |
86%|βββββββββ | 9151/10682 [1:17:34<12:35, 2.03it/s]
|
1503 |
86%|βββββββββ | 9152/10682 [1:17:35<12:34, 2.03it/s]
|
1504 |
86%|βββββββββ | 9153/10682 [1:17:35<12:33, 2.03it/s]
|
1505 |
86%|βββββββββ | 9154/10682 [1:17:36<12:33, 2.03it/s]
|
1506 |
86%|βββββββββ | 9155/10682 [1:17:36<12:32, 2.03it/s]
|
1507 |
86%|βββββββββ | 9156/10682 [1:17:37<12:25, 2.05it/s]
|
1508 |
86%|βββββββββ | 9157/10682 [1:17:52<2:01:47, 4.79s/it]
|
1509 |
86%|βββββββββ | 9158/10682 [1:17:52<1:29:01, 3.50s/it]
|
1510 |
86%|βββββββββ | 9159/10682 [1:17:53<1:06:01, 2.60s/it]
|
1511 |
86%|βββββββββ | 9160/10682 [1:17:53<50:00, 1.97s/it]
|
1512 |
86%|βββββββββ | 9161/10682 [1:17:54<38:43, 1.53s/it]
|
1513 |
86%|βββββββββ | 9162/10682 [1:17:54<30:50, 1.22s/it]
|
1514 |
86%|βββββββββ | 9163/10682 [1:17:55<25:20, 1.00s/it]
|
1515 |
86%|βββββββββ | 9164/10682 [1:17:55<21:28, 1.18it/s]
|
1516 |
86%|βββββββββ | 9165/10682 [1:17:56<18:44, 1.35it/s]
|
1517 |
86%|βββββββββ | 9166/10682 [1:17:56<16:50, 1.50it/s]
|
1518 |
86%|βββββββββ | 9167/10682 [1:17:57<15:32, 1.63it/s]
|
1519 |
86%|βββββββββ | 9168/10682 [1:17:57<14:35, 1.73it/s]
|
1520 |
86%|βββββββββ | 9169/10682 [1:17:58<13:55, 1.81it/s]
|
1521 |
86%|βββββββββ | 9170/10682 [1:17:58<13:28, 1.87it/s]
|
1522 |
86%|βββββββββ | 9171/10682 [1:17:59<13:07, 1.92it/s]
|
1523 |
86%|βββββββββ | 9172/10682 [1:17:59<12:54, 1.95it/s]
|
1524 |
86%|βββββββββ | 9173/10682 [1:18:00<12:45, 1.97it/s]
|
1525 |
86%|βββββββββ | 9174/10682 [1:18:00<12:38, 1.99it/s]
|
1526 |
86%|βββββββββ | 9175/10682 [1:18:01<12:32, 2.00it/s]
|
1527 |
|
1528 |
+
|
1529 |
86%|βββββββββ | 9175/10682 [1:18:01<12:32, 2.00it/s]
|
1530 |
86%|βββββββββ | 9176/10682 [1:18:01<12:30, 2.01it/s]
|
1531 |
86%|βββββββββ | 9177/10682 [1:18:02<12:26, 2.01it/s]
|
1532 |
86%|βββββββββ | 9178/10682 [1:18:02<12:24, 2.02it/s]
|
1533 |
86%|βββββββββ | 9179/10682 [1:18:03<12:23, 2.02it/s]
|
1534 |
86%|βββββββββ | 9180/10682 [1:18:03<12:22, 2.02it/s]
|
1535 |
86%|βββββββββ | 9181/10682 [1:18:04<12:21, 2.02it/s]
|
1536 |
86%|βββββββββ | 9182/10682 [1:18:04<12:20, 2.03it/s]
|
1537 |
86%|βββββββββ | 9183/10682 [1:18:05<12:19, 2.03it/s]
|
1538 |
86%|βββββββββ | 9184/10682 [1:18:05<12:19, 2.03it/s]
|
1539 |
86%|βββββββββ | 9185/10682 [1:18:06<12:19, 2.03it/s]
|
1540 |
86%|βββββββββ | 9186/10682 [1:18:06<12:17, 2.03it/s]
|
1541 |
86%|βββββββββ | 9187/10682 [1:18:07<12:15, 2.03it/s]
|
1542 |
86%|βββββββββ | 9188/10682 [1:18:07<12:15, 2.03it/s]
|