Training in progress, epoch 10
Browse files- logs/events.out.tfevents.1716583437.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +33 -1
logs/events.out.tfevents.1716583437.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cd93c92e2c16a7746479214ec76207f9f52b5086780b4d21bfcd1b4f81d0399
|
3 |
+
size 78276
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9cd819eece9b23094227d17f28bc3f6ef50891a1b71638a9875dc4cf000bd69
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -536,4 +536,36 @@ command outputs:
|
|
536 |
|
537 |
71%|βββββββ | 7850/11074 [1:06:57<26:33, 2.02it/s]
|
538 |
71%|βββββββ | 7851/11074 [1:06:58<26:34, 2.02it/s]
|
539 |
71%|βββββββ | 7852/11074 [1:06:58<26:34, 2.02it/s]
|
540 |
71%|βββββββ | 7853/11074 [1:06:59<26:32, 2.02it/s]
|
541 |
71%|βββββββ | 7854/11074 [1:06:59<26:32, 2.02it/s]
|
542 |
71%|βββββββ | 7855/11074 [1:07:00<26:30, 2.02it/s]
|
543 |
71%|βββββββ | 7856/11074 [1:07:00<26:30, 2.02it/s]
|
544 |
71%|βββββββ | 7857/11074 [1:07:01<26:28, 2.02it/s]
|
545 |
71%|βββββββ | 7858/11074 [1:07:01<26:29, 2.02it/s]
|
546 |
71%|βββββββ | 7859/11074 [1:07:02<26:27, 2.02it/s]
|
547 |
71%|βββββββ | 7860/11074 [1:07:02<26:27, 2.02it/s]
|
548 |
71%|βββββββ | 7861/11074 [1:07:03<26:27, 2.02it/s]
|
549 |
71%|βββββββ | 7862/11074 [1:07:03<26:26, 2.02it/s]
|
550 |
71%|βββββββ | 7863/11074 [1:07:04<26:26, 2.02it/s]
|
551 |
71%|βββββββ | 7864/11074 [1:07:04<26:24, 2.03it/s]
|
552 |
71%|βββββββ | 7865/11074 [1:07:05<26:23, 2.03it/s]
|
553 |
71%|βββββββ | 7866/11074 [1:07:05<26:23, 2.03it/s]
|
554 |
71%|βββββββ | 7867/11074 [1:07:06<26:23, 2.02it/s]
|
555 |
71%|βββββββ | 7868/11074 [1:07:06<26:22, 2.03it/s]
|
556 |
71%|βββββββ | 7869/11074 [1:07:07<26:21, 2.03it/s]
|
557 |
71%|βββββββ | 7870/11074 [1:07:07<26:21, 2.03it/s]
|
558 |
71%|βββββββ | 7871/11074 [1:07:07<26:20, 2.03it/s]
|
559 |
71%|βββββββ | 7872/11074 [1:07:08<26:20, 2.03it/s]
|
560 |
71%|βββββββ | 7873/11074 [1:07:08<26:20, 2.03it/s]
|
561 |
71%|βββββββ | 7874/11074 [1:07:09<26:19, 2.03it/s]
|
562 |
71%|βββββββ | 7875/11074 [1:07:09<26:19, 2.03it/s]
|
563 |
{'loss': 3.3077, 'grad_norm': 0.23198536038398743, 'learning_rate': 0.0002334027221578824, 'epoch': 9.95}
|
564 |
|
565 |
71%|βββββββ | 7875/11074 [1:07:09<26:19, 2.03it/s]
|
566 |
71%|βββββββ | 7876/11074 [1:07:10<26:22, 2.02it/s]
|
567 |
71%|βββββββ | 7877/11074 [1:07:10<26:21, 2.02it/s]
|
568 |
71%|βββββββ | 7878/11074 [1:07:11<26:21, 2.02it/s]
|
569 |
71%|βββββββ | 7879/11074 [1:07:11<26:20, 2.02it/s]
|
570 |
71%|βββββββ | 7880/11074 [1:07:12<26:18, 2.02it/s]
|
571 |
71%|βββββββ | 7881/11074 [1:07:12<26:18, 2.02it/s]
|
572 |
71%|βββββββ | 7882/11074 [1:07:13<26:18, 2.02it/s]
|
573 |
71%|βββββββ | 7883/11074 [1:07:13<26:15, 2.02it/s]
|
574 |
71%|βββββββ | 7884/11074 [1:07:14<26:15, 2.02it/s]
|
575 |
71%|βββββββ | 7885/11074 [1:07:14<26:13, 2.03it/s]
|
576 |
71%|βββββββ | 7886/11074 [1:07:15<26:13, 2.03it/s]
|
577 |
71%|βββββββ | 7887/11074 [1:07:15<26:13, 2.03it/s]
|
578 |
71%|βββββββ | 7888/11074 [1:07:16<26:11, 2.03it/s]
|
579 |
71%|βββββββ | 7889/11074 [1:07:16<26:12, 2.02it/s]
|
580 |
71%|βββββββ | 7890/11074 [1:07:17<26:10, 2.03it/s]
|
581 |
71%|ββββββββ | 7891/11074 [1:07:17<26:11, 2.02it/s]
|
582 |
71%|ββββββββ | 7892/11074 [1:07:18<26:08, 2.03it/s]
|
583 |
71%|ββββββββ | 7893/11074 [1:07:18<26:10, 2.03it/s]
|
584 |
71%|ββββββββ | 7894/11074 [1:07:19<26:10, 2.03it/s]
|
585 |
71%|ββββββββ | 7895/11074 [1:07:19<26:09, 2.03it/s]
|
586 |
71%|ββββββββ | 7896/11074 [1:07:20<26:09, 2.03it/s]
|
587 |
71%|ββββββββ | 7897/11074 [1:07:20<26:09, 2.02it/s]
|
588 |
71%|ββββββββ | 7898/11074 [1:07:21<26:08, 2.03it/s]
|
589 |
71%|ββββββββ | 7899/11074 [1:07:21<26:06, 2.03it/s]
|
590 |
71%|ββββββββ | 7900/11074 [1:07:22<26:06, 2.03it/s]{'loss': 3.3093, 'grad_norm': 0.23056922852993011, 'learning_rate': 0.00023007749594435663, 'epoch': 9.98}
|
591 |
|
592 |
|
593 |
71%|ββββββββ | 7900/11074 [1:07:22<26:06, 2.03it/s]
|
594 |
71%|ββββββββ | 7901/11074 [1:07:22<26:09, 2.02it/s]
|
595 |
71%|ββββββββ | 7902/11074 [1:07:23<26:09, 2.02it/s]
|
596 |
71%|ββββββββ | 7903/11074 [1:07:23<26:07, 2.02it/s]
|
597 |
71%|ββββββββ | 7904/11074 [1:07:24<26:06, 2.02it/s]
|
598 |
71%|ββββββββ | 7905/11074 [1:07:24<26:05, 2.02it/s]
|
599 |
71%|ββββββββ | 7906/11074 [1:07:25<26:03, 2.03it/s]
|
600 |
71%|ββββββββ | 7907/11074 [1:07:25<26:03, 2.03it/s]
|
601 |
71%|ββββββββ | 7908/11074 [1:07:26<26:01, 2.03it/s]
|
602 |
71%|ββββββββ | 7909/11074 [1:07:26<26:01, 2.03it/s]
|
603 |
71%|ββββββββ | 7910/11074 [1:07:27<26:01, 2.03it/s]
|
604 |
71%|ββββββββ | 7911/11074 [1:07:27<25:59, 2.03it/s]
|
605 |
71%|ββββββββ | 7912/11074 [1:07:28<25:59, 2.03it/s]
|
606 |
71%|ββββββββ | 7913/11074 [1:07:28<25:58, 2.03it/s]
|
607 |
71%|ββββββββ | 7914/11074 [1:07:29<26:00, 2.03it/s]
|
608 |
71%|ββββββββ | 7915/11074 [1:07:29<25:45, 2.04it/s]
|
609 |
71%|βοΏ½οΏ½οΏ½ββββββ | 7916/11074 [1:07:41<3:26:15, 3.92s/it]
|
610 |
71%|ββββββββ | 7917/11074 [1:07:42<2:32:17, 2.89s/it]
|
611 |
72%|ββββββββ | 7918/11074 [1:07:42<1:54:19, 2.17s/it]
|
612 |
72%|ββββββββ | 7919/11074 [1:07:43<1:27:47, 1.67s/it]
|
613 |
72%|ββββββββ | 7920/11074 [1:07:43<1:09:13, 1.32s/it]
|
614 |
72%|ββββββββ | 7921/11074 [1:07:44<56:14, 1.07s/it]
|
615 |
72%|ββββββββ | 7922/11074 [1:07:44<47:07, 1.11it/s]
|
616 |
72%|ββββββββ | 7923/11074 [1:07:45<40:57, 1.28it/s]
|
617 |
72%|ββββββββ | 7924/11074 [1:07:45<36:26, 1.44it/s]
|
618 |
72%|ββββββββ | 7925/11074 [1:07:46<33:16, 1.58it/s]
|
619 |
|
620 |
-
|
621 |
72%|ββββββββ | 7925/11074 [1:07:46<33:16, 1.58it/s]
|
622 |
72%|ββββββββ | 7926/11074 [1:07:46<31:04, 1.69it/s]
|
623 |
72%|ββββββββ | 7927/11074 [1:07:47<29:32, 1.78it/s]
|
624 |
72%|ββββββββ | 7928/11074 [1:07:47<28:29, 1.84it/s]
|
625 |
72%|ββββββββ | 7929/11074 [1:07:48<27:42, 1.89it/s]
|
626 |
72%|ββββββββ | 7930/11074 [1:07:48<27:10, 1.93it/s]
|
627 |
72%|ββββββββ | 7931/11074 [1:07:49<26:47, 1.96it/s]
|
628 |
72%|ββββββββ | 7932/11074 [1:07:49<26:30, 1.98it/s]
|
629 |
72%|ββββββββ | 7933/11074 [1:07:50<26:19, 1.99it/s]
|
630 |
72%|ββββββββ | 7934/11074 [1:07:50<26:10, 2.00it/s]
|
631 |
72%|ββββββββ | 7935/11074 [1:07:51<26:03, 2.01it/s]
|
632 |
72%|ββββββββ | 7936/11074 [1:07:51<25:59, 2.01it/s]
|
633 |
72%|ββββββββ | 7937/11074 [1:07:52<25:55, 2.02it/s]
|
634 |
72%|ββββββββ | 7938/11074 [1:07:52<25:52, 2.02it/s]
|
635 |
72%|ββββββββ | 7939/11074 [1:07:53<25:50, 2.02it/s]
|
636 |
72%|ββββββββ | 7940/11074 [1:07:53<25:54, 2.02it/s]
|
|
|
637 |
72%|ββββββββ | 7925/11074 [1:07:46<33:16, 1.58it/s]
|
638 |
72%|ββββββββ | 7926/11074 [1:07:46<31:04, 1.69it/s]
|
639 |
72%|ββββββββ | 7927/11074 [1:07:47<29:32, 1.78it/s]
|
640 |
72%|ββββββββ | 7928/11074 [1:07:47<28:29, 1.84it/s]
|
641 |
72%|ββββββββ | 7929/11074 [1:07:48<27:42, 1.89it/s]
|
642 |
72%|ββββββββ | 7930/11074 [1:07:48<27:10, 1.93it/s]
|
643 |
72%|ββββββββ | 7931/11074 [1:07:49<26:47, 1.96it/s]
|
644 |
72%|ββββββββ | 7932/11074 [1:07:49<26:30, 1.98it/s]
|
645 |
72%|ββββββββ | 7933/11074 [1:07:50<26:19, 1.99it/s]
|
646 |
72%|ββββββββ | 7934/11074 [1:07:50<26:10, 2.00it/s]
|
647 |
72%|ββββββββ | 7935/11074 [1:07:51<26:03, 2.01it/s]
|
648 |
72%|ββββββββ | 7936/11074 [1:07:51<25:59, 2.01it/s]
|
649 |
72%|ββββββββ | 7937/11074 [1:07:52<25:55, 2.02it/s]
|
650 |
72%|ββββββββ | 7938/11074 [1:07:52<25:52, 2.02it/s]
|
651 |
72%|ββββββββ | 7939/11074 [1:07:53<25:50, 2.02it/s]
|
652 |
72%|ββββββββ | 7940/11074 [1:07:53<25:54, 2.02it/s]
|
653 |
72%|ββββββββ | 7941/11074 [1:07:54<25:57, 2.01it/s]
|
654 |
72%|ββββββββ | 7942/11074 [1:07:54<25:51, 2.02it/s]
|
655 |
72%|ββββββββ | 7943/11074 [1:07:55<25:51, 2.02it/s]
|
656 |
72%|ββββββββ | 7944/11074 [1:07:55<25:47, 2.02it/s]
|
657 |
72%|ββββββββ | 7945/11074 [1:07:55<25:47, 2.02it/s]
|
658 |
72%|ββββββββ | 7946/11074 [1:07:56<25:44, 2.03it/s]
|
659 |
72%|ββββββββ | 7947/11074 [1:07:56<25:43, 2.03it/s]
|
660 |
72%|ββββββββ | 7948/11074 [1:07:57<25:43, 2.02it/s]
|
661 |
72%|ββββββββ | 7949/11074 [1:07:57<25:41, 2.03it/s]
|
662 |
72%|ββββββββ | 7950/11074 [1:07:58<25:42, 2.03it/s]{'loss': 3.2027, 'grad_norm': 0.23849323391914368, 'learning_rate': 0.00022347754066845987, 'epoch': 10.04}
|
663 |
|
|
|
664 |
72%|ββββββββ | 7950/11074 [1:07:58<25:42, 2.03it/s]
|
665 |
72%|ββββββββ | 7951/11074 [1:07:58<25:44, 2.02it/s]
|
666 |
72%|ββββββββ | 7952/11074 [1:07:59<25:42, 2.02it/s]
|
667 |
72%|ββββββββ | 7953/11074 [1:07:59<25:40, 2.03it/s]
|
668 |
72%|ββββββββ | 7954/11074 [1:08:00<25:40, 2.03it/s]
|
669 |
72%|ββββββββ | 7955/11074 [1:08:00<25:38, 2.03it/s]
|
670 |
72%|ββββββββ | 7956/11074 [1:08:01<25:38, 2.03it/s]
|
671 |
72%|ββββββββ | 7957/11074 [1:08:01<25:38, 2.03it/s]
|
672 |
72%|ββββββββ | 7958/11074 [1:08:02<25:38, 2.02it/s]
|
673 |
72%|ββββββββ | 7959/11074 [1:08:02<25:39, 2.02it/s]
|
674 |
72%|ββββββββ | 7960/11074 [1:08:03<25:36, 2.03it/s]
|
675 |
72%|ββββββββ | 7961/11074 [1:08:03<25:36, 2.03it/s]
|
676 |
72%|ββββββββ | 7962/11074 [1:08:04<25:35, 2.03it/s]
|
677 |
72%|ββββββββ | 7963/11074 [1:08:04<25:35, 2.03it/s]
|
678 |
72%|ββββββββ | 7964/11074 [1:08:05<25:34, 2.03it/s]
|
679 |
72%|ββββββββ | 7965/11074 [1:08:05<25:34, 2.03it/s]
|
680 |
72%|ββββββββ | 7966/11074 [1:08:06<25:35, 2.02it/s]
|
681 |
72%|ββββββββ | 7967/11074 [1:08:06<25:35, 2.02it/s]
|
682 |
72%|ββββββββ | 7968/11074 [1:08:07<25:35, 2.02it/s]
|
683 |
72%|ββββββββ | 7969/11074 [1:08:07<25:34, 2.02it/s]
|
684 |
72%|ββββββββ | 7970/11074 [1:08:08<25:33, 2.02it/s]
|
685 |
72%|ββββββββ | 7971/11074 [1:08:08<25:33, 2.02it/s]
|
686 |
72%|ββββββββ | 7972/11074 [1:08:09<25:32, 2.02it/s]
|
687 |
72%|ββββββββ | 7973/11074 [1:08:09<25:30, 2.03it/s]
|
688 |
72%|ββββββββ | 7974/11074 [1:08:10<25:30, 2.03it/s]
|
689 |
72%|ββββββββ | 7975/11074 [1:08:10<25:29, 2.03it/s]{'loss': 3.1925, 'grad_norm': 0.2428920567035675, 'learning_rate': 0.00022020322150497878, 'epoch': 10.08}
|
690 |
|
|
|
691 |
72%|ββββββββ | 7975/11074 [1:08:10<25:29, 2.03it/s]
|
692 |
72%|ββββββββ | 7976/11074 [1:08:11<25:30, 2.02it/s]
|
693 |
72%|ββββββββ | 7977/11074 [1:08:11<25:29, 2.02it/s]
|
694 |
72%|ββββββββ | 7978/11074 [1:08:12<25:30, 2.02it/s]
|
695 |
72%|ββββββββ | 7979/11074 [1:08:12<25:29, 2.02it/s]
|
696 |
72%|ββββββββ | 7980/11074 [1:08:13<25:29, 2.02it/s]
|
697 |
72%|ββββββββ | 7981/11074 [1:08:13<25:28, 2.02it/s]
|
698 |
72%|ββββββββ | 7982/11074 [1:08:14<25:26, 2.03it/s]
|
699 |
72%|ββββββββ | 7983/11074 [1:08:14<25:27, 2.02it/s]
|
700 |
72%|ββββββββ | 7984/11074 [1:08:15<25:24, 2.03it/s]
|
701 |
72%|ββββββββ | 7985/11074 [1:08:15<25:25, 2.03it/s]
|
702 |
72%|ββββββββ | 7986/11074 [1:08:16<25:23, 2.03it/s]
|
703 |
72%|ββββββββ | 7987/11074 [1:08:16<25:24, 2.03it/s]
|
704 |
72%|ββββββββ | 7988/11074 [1:08:17<25:23, 2.03it/s]
|
705 |
72%|ββββββββ | 7989/11074 [1:08:17<25:20, 2.03it/s]
|
706 |
72%|ββββββββ | 7990/11074 [1:08:18<25:22, 2.03it/s]
|
707 |
72%|ββββββββ | 7991/11074 [1:08:18<25:21, 2.03it/s]
|
708 |
72%|ββββββββ | 7992/11074 [1:08:19<25:22, 2.02it/s]
|
709 |
72%|ββββββββ | 7993/11074 [1:08:19<25:20, 2.03it/s]
|
710 |
72%|ββββββββ | 7994/11074 [1:08:20<25:21, 2.02it/s]
|
711 |
72%|ββββββββ | 7995/11074 [1:08:20<25:20, 2.03it/s]
|
712 |
72%|ββββββββ | 7996/11074 [1:08:21<25:19, 2.03it/s]
|
713 |
72%|ββββββββ | 7997/11074 [1:08:21<25:19, 2.02it/s]
|
714 |
72%|ββββββββ | 7998/11074 [1:08:22<25:17, 2.03it/s]
|
715 |
72%|ββββββββ | 7999/11074 [1:08:22<25:18, 2.03it/s]
|
716 |
72%|ββββββββ | 8000/11074 [1:08:23<25:15, 2.03it/s]
|
717 |
{'loss': 3.2116, 'grad_norm': 0.23470519483089447, 'learning_rate': 0.00021694627948786466, 'epoch': 10.11}
|
|
|
718 |
72%|ββββββββ | 8000/11074 [1:08:23<25:15, 2.03it/s]
|
719 |
72%|ββββββββ | 8001/11074 [1:08:23<25:18, 2.02it/s]
|
720 |
72%|ββββββββ | 8002/11074 [1:08:24<25:17, 2.02it/s]
|
721 |
72%|ββββββββ | 8003/11074 [1:08:24<25:17, 2.02it/s]
|
722 |
72%|ββββββββ | 8004/11074 [1:08:25<25:16, 2.02it/s]
|
723 |
72%|ββββββββ | 8005/11074 [1:08:25<25:16, 2.02it/s]
|
724 |
72%|ββββββββ | 8006/11074 [1:08:26<25:16, 2.02it/s]
|
725 |
72%|ββββββββ | 8007/11074 [1:08:26<25:15, 2.02it/s]
|
726 |
72%|ββββββββ | 8008/11074 [1:08:27<25:14, 2.03it/s]
|
727 |
72%|ββββββββ | 8009/11074 [1:08:27<25:12, 2.03it/s]
|
728 |
72%|ββββββββ | 8010/11074 [1:08:28<25:12, 2.03it/s]
|
729 |
72%|ββββββββ | 8011/11074 [1:08:28<25:11, 2.03it/s]
|
730 |
72%|ββββββββ | 8012/11074 [1:08:29<25:11, 2.03it/s]
|
731 |
72%|ββββββββ | 8013/11074 [1:08:29<25:11, 2.02it/s]
|
732 |
72%|ββββββββ | 8014/11074 [1:08:30<25:11, 2.02it/s]
|
733 |
72%|ββββββββ | 8015/11074 [1:08:30<25:10, 2.02it/s]
|
734 |
72%|ββββββββ | 8016/11074 [1:08:31<25:10, 2.02it/s]
|
735 |
72%|ββββββββ | 8017/11074 [1:08:31<25:10, 2.02it/s]
|
736 |
72%|ββββββββ | 8018/11074 [1:08:32<25:08, 2.03it/s]
|
737 |
72%|ββββββββ | 8019/11074 [1:08:32<25:10, 2.02it/s]
|
738 |
72%|ββββββββ | 8020/11074 [1:08:33<25:07, 2.03it/s]
|
739 |
72%|ββββββββ | 8021/11074 [1:08:33<25:08, 2.02it/s]
|
740 |
72%|ββββββββ | 8022/11074 [1:08:34<25:06, 2.03it/s]
|
741 |
72%|ββββββββ | 8023/11074 [1:08:34<25:07, 2.02it/s]
|
742 |
72%|ββββββββ | 8024/11074 [1:08:34<25:07, 2.02it/s]
|
743 |
72%|ββββββββ | 8025/11074 [1:08:35<25:07, 2.02it/s]
|
744 |
{'loss': 3.2031, 'grad_norm': 0.24160942435264587, 'learning_rate': 0.00021370691689377887, 'epoch': 10.14}
|
|
|
745 |
72%|ββββββββ | 8025/11074 [1:08:35<25:07, 2.02it/s]
|
746 |
72%|ββββββββ | 8026/11074 [1:08:35<25:08, 2.02it/s]
|
747 |
72%|ββοΏ½οΏ½οΏ½βββββ | 8027/11074 [1:08:36<25:06, 2.02it/s]
|
748 |
72%|ββββββββ | 8028/11074 [1:08:36<25:05, 2.02it/s]
|
749 |
73%|ββββββββ | 8029/11074 [1:08:37<25:04, 2.02it/s]
|
750 |
73%|ββββββββ | 8030/11074 [1:08:37<25:03, 2.02it/s]
|
751 |
73%|ββββββββ | 8031/11074 [1:08:38<25:02, 2.03it/s]
|
752 |
73%|ββββββββ | 8032/11074 [1:08:38<25:02, 2.02it/s]
|
753 |
73%|ββββββββ | 8033/11074 [1:08:39<25:03, 2.02it/s]
|
754 |
73%|ββββββββ | 8034/11074 [1:08:39<25:02, 2.02it/s]
|
755 |
73%|ββββββββ | 8035/11074 [1:08:40<25:01, 2.02it/s]
|
756 |
73%|ββββββββ | 8036/11074 [1:08:40<25:01, 2.02it/s]
|
757 |
73%|ββββββββ | 8037/11074 [1:08:41<24:59, 2.03it/s]
|
758 |
73%|ββββββββ | 8038/11074 [1:08:41<24:59, 2.02it/s]
|
759 |
73%|ββββββββ | 8039/11074 [1:08:42<24:58, 2.03it/s]
|
760 |
73%|ββββββββ | 8040/11074 [1:08:42<24:58, 2.02it/s]
|
761 |
73%|ββββββββ | 8041/11074 [1:08:43<24:58, 2.02it/s]
|
762 |
73%|ββββββββ | 8042/11074 [1:08:43<24:57, 2.03it/s]
|
763 |
73%|ββββββββ | 8043/11074 [1:08:44<25:14, 2.00it/s]
|
764 |
73%|ββββββββ | 8044/11074 [1:08:44<25:08, 2.01it/s]
|
765 |
73%|ββββββββ | 8045/11074 [1:08:45<25:03, 2.01it/s]
|
766 |
73%|ββββββββ | 8046/11074 [1:08:45<25:00, 2.02it/s]
|
767 |
73%|ββββββββ | 8047/11074 [1:08:46<24:58, 2.02it/s]
|
768 |
73%|ββββββββ | 8048/11074 [1:08:46<24:57, 2.02it/s]
|
769 |
73%|ββββββββ | 8049/11074 [1:08:47<24:57, 2.02it/s]
|
770 |
73%|ββββββββ | 8050/11074 [1:08:47<24:55, 2.02it/s]
|
771 |
|
|
|
772 |
73%|ββββββββ | 8050/11074 [1:08:47<24:55, 2.02it/s]
|
773 |
73%|ββββββββ | 8051/11074 [1:08:48<24:56, 2.02it/s]
|
774 |
73%|ββββββββ | 8052/11074 [1:08:48<24:55, 2.02it/s]
|
775 |
73%|ββββββββ | 8053/11074 [1:08:49<24:55, 2.02it/s]
|
776 |
73%|ββββββββ | 8054/11074 [1:08:49<24:53, 2.02it/s]
|
777 |
73%|ββββββββ | 8055/11074 [1:08:50<24:52, 2.02it/s]
|
778 |
73%|ββββββββ | 8056/11074 [1:08:50<24:52, 2.02it/s]
|
779 |
73%|ββββββββ | 8057/11074 [1:08:51<24:49, 2.03it/s]
|
780 |
73%|ββββββββ | 8058/11074 [1:08:51<24:49, 2.02it/s]
|
781 |
73%|ββββββββ | 8059/11074 [1:08:52<24:48, 2.02it/s]
|
782 |
73%|ββββββββ | 8060/11074 [1:08:52<24:48, 2.02it/s]
|
783 |
73%|ββββββββ | 8061/11074 [1:08:53<24:47, 2.03it/s]
|
784 |
73%|ββββββββ | 8062/11074 [1:08:53<24:47, 2.02it/s]
|
785 |
73%|ββββββββ | 8063/11074 [1:08:54<24:47, 2.02it/s]
|
786 |
73%|ββββββββ | 8064/11074 [1:08:54<24:44, 2.03it/s]
|
787 |
73%|ββββββββ | 8065/11074 [1:08:55<24:45, 2.03it/s]
|
788 |
73%|ββββββββ | 8066/11074 [1:08:55<24:43, 2.03it/s]
|
789 |
73%|ββββββββ | 8067/11074 [1:08:56<24:44, 2.03it/s]
|
790 |
73%|ββββββββ | 8068/11074 [1:08:56<24:43, 2.03it/s]
|
791 |
73%|ββββββββ | 8069/11074 [1:08:57<24:43, 2.03it/s]
|
792 |
73%|ββββββββ | 8070/11074 [1:08:57<24:42, 2.03it/s]
|
793 |
73%|ββββββββ | 8071/11074 [1:08:58<24:42, 2.03it/s]
|
794 |
73%|ββββββββ | 8072/11074 [1:08:58<24:44, 2.02it/s]
|
795 |
73%|ββββββββ | 8073/11074 [1:08:59<24:44, 2.02it/s]
|
796 |
73%|ββββββββ | 8074/11074 [1:08:59<24:43, 2.02it/s]
|
797 |
73%|ββββββββ | 8075/11074 [1:09:00<24:43, 2.02it/s]
|
798 |
|
|
|
799 |
73%|ββββββββ | 8075/11074 [1:09:00<24:43, 2.02it/s]
|
800 |
73%|ββββββββ | 8076/11074 [1:09:00<24:43, 2.02it/s]
|
801 |
73%|ββββββββ | 8077/11074 [1:09:01<24:42, 2.02it/s]
|
802 |
73%|ββββββββ | 8078/11074 [1:09:01<24:41, 2.02it/s]
|
803 |
73%|ββββββββ | 8079/11074 [1:09:02<24:38, 2.03it/s]
|
804 |
73%|ββββββββ | 8080/11074 [1:09:02<24:38, 2.02it/s]
|
805 |
73%|ββββββββ | 8081/11074 [1:09:03<24:35, 2.03it/s]
|
806 |
73%|ββββββββ | 8082/11074 [1:09:03<24:37, 2.03it/s]
|
807 |
73%|ββββββββ | 8083/11074 [1:09:04<24:36, 2.03it/s]
|
808 |
73%|ββββββββ | 8084/11074 [1:09:04<24:35, 2.03it/s]
|
809 |
73%|ββββββββ | 8085/11074 [1:09:05<24:34, 2.03it/s]
|
810 |
73%|ββββββββ | 8086/11074 [1:09:05<24:33, 2.03it/s]
|
811 |
73%|ββββββββ | 8087/11074 [1:09:06<24:34, 2.03it/s]
|
812 |
73%|ββββββββ | 8088/11074 [1:09:06<24:31, 2.03it/s]
|
813 |
73%|ββββββββ | 8089/11074 [1:09:07<24:32, 2.03it/s]
|
814 |
73%|ββββββββ | 8090/11074 [1:09:07<24:31, 2.03it/s]
|
815 |
73%|ββββββββ | 8091/11074 [1:09:08<24:32, 2.03it/s]
|
816 |
73%|ββββββββ | 8092/11074 [1:09:08<24:31, 2.03it/s]
|
817 |
73%|ββββββββ | 8093/11074 [1:09:09<24:31, 2.03it/s]
|
818 |
73%|ββββββββ | 8094/11074 [1:09:09<24:31, 2.03it/s]
|
819 |
73%|ββββββββ | 8095/11074 [1:09:10<24:31, 2.02it/s]
|
820 |
73%|ββββββββ | 8096/11074 [1:09:10<24:30, 2.02it/s]
|
821 |
73%|ββββββββ | 8097/11074 [1:09:11<24:31, 2.02it/s]
|
822 |
73%|ββββββββ | 8098/11074 [1:09:11<24:30, 2.02it/s]
|
823 |
73%|ββββββββ | 8099/11074 [1:09:12<24:28, 2.03it/s]
|
824 |
73%|ββββββββ | 8100/11074 [1:09:12<24:29, 2.02it/s]{'loss': 3.2194, 'grad_norm': 0.23723545670509338, 'learning_rate': 0.00020409631196450457, 'epoch': 10.23}
|
|
|
825 |
|
826 |
73%|ββββββββ | 8100/11074 [1:09:12<24:29, 2.02it/s]
|
827 |
73%|ββββββββ | 8101/11074 [1:09:13<24:28, 2.02it/s]
|
828 |
73%|ββββββββ | 8102/11074 [1:09:13<24:28, 2.02it/s]
|
829 |
73%|ββββββββ | 8103/11074 [1:09:14<24:26, 2.03it/s]
|
830 |
73%|ββββββββ | 8104/11074 [1:09:14<24:26, 2.03it/s]
|
831 |
73%|ββββββββ | 8105/11074 [1:09:15<24:24, 2.03it/s]
|
832 |
73%|ββββββββ | 8106/11074 [1:09:15<24:25, 2.03it/s]
|
833 |
73%|ββββββββ | 8107/11074 [1:09:16<24:25, 2.02it/s]
|
834 |
73%|ββββββββ | 8108/11074 [1:09:16<24:24, 2.03it/s]
|
835 |
73%|ββββββββ | 8109/11074 [1:09:16<24:24, 2.02it/s]
|
836 |
73%|ββββββββ | 8110/11074 [1:09:17<24:22, 2.03it/s]
|
837 |
73%|ββββββββ | 8111/11074 [1:09:17<24:23, 2.03it/s]
|
838 |
73%|ββββββββ | 8112/11074 [1:09:18<24:21, 2.03it/s]
|
839 |
73%|ββββββββ | 8113/11074 [1:09:18<24:22, 2.03it/s]
|
840 |
73%|ββββββββ | 8114/11074 [1:09:19<24:20, 2.03it/s]
|
841 |
73%|ββββββββ | 8115/11074 [1:09:19<24:19, 2.03it/s]
|
842 |
73%|ββββββββ | 8116/11074 [1:09:20<24:20, 2.03it/s]
|
843 |
73%|ββββββββ | 8117/11074 [1:09:20<24:17, 2.03it/s]
|
844 |
73%|ββββββββ | 8118/11074 [1:09:21<24:19, 2.03it/s]
|
845 |
73%|ββββββββ | 8119/11074 [1:09:21<24:17, 2.03it/s]
|
846 |
73%|ββββββββ | 8120/11074 [1:09:22<24:17, 2.03it/s]
|
847 |
73%|ββββββββ | 8121/11074 [1:09:22<24:17, 2.03it/s]
|
848 |
73%|ββββββββ | 8122/11074 [1:09:23<24:17, 2.03it/s]
|
849 |
73%|ββββββββ | 8123/11074 [1:09:23<24:17, 2.02it/s]
|
850 |
73%|ββββββββ | 8124/11074 [1:09:24<24:15, 2.03it/s]
|
851 |
73%|ββββββββ | 8125/11074 [1:09:24<24:16, 2.02it/s]
|
852 |
|
|
|
853 |
73%|ββββββββ | 8125/11074 [1:09:24<24:16, 2.02it/s]
|
854 |
73%|ββββββββ | 8126/11074 [1:09:25<24:16, 2.02it/s]
|
855 |
73%|ββββββββ | 8127/11074 [1:09:25<24:18, 2.02it/s]
|
856 |
73%|ββββββββ | 8128/11074 [1:09:26<24:15, 2.02it/s]
|
857 |
73%|ββββββββ | 8129/11074 [1:09:26<24:14, 2.02it/s]
|
858 |
73%|ββββββββ | 8130/11074 [1:09:27<24:13, 2.03it/s]
|
859 |
73%|ββββββββ | 8131/11074 [1:09:27<24:14, 2.02it/s]
|
860 |
73%|ββββββββ | 8132/11074 [1:09:28<24:13, 2.02it/s]
|
861 |
73%|ββββββββ | 8133/11074 [1:09:28<24:14, 2.02it/s]
|
862 |
73%|ββββββββ | 8134/11074 [1:09:29<24:13, 2.02it/s]
|
863 |
73%|ββββββββ | 8135/11074 [1:09:29<24:12, 2.02it/s]
|
864 |
73%|ββββββββ | 8136/11074 [1:09:30<24:11, 2.02it/s]
|
865 |
73%|ββββββββ | 8137/11074 [1:09:30<24:11, 2.02it/s]
|
866 |
73%|ββββββββ | 8138/11074 [1:09:31<24:11, 2.02it/s]
|
867 |
73%|ββββββββ | 8139/11074 [1:09:31<24:10, 2.02it/s]
|
868 |
74%|ββββββββ | 8140/11074 [1:09:32<24:10, 2.02it/s]
|
869 |
74%|ββββββββ | 8141/11074 [1:09:32<24:10, 2.02it/s]
|
870 |
74%|ββββββββ | 8142/11074 [1:09:33<24:08, 2.02it/s]
|
871 |
74%|ββββββββ | 8143/11074 [1:09:33<24:07, 2.02it/s]
|
872 |
74%|ββββββββ | 8144/11074 [1:09:34<24:06, 2.03it/s]
|
873 |
74%|ββββββββ | 8145/11074 [1:09:34<24:06, 2.03it/s]
|
874 |
74%|ββββββββ | 8146/11074 [1:09:35<24:05, 2.03it/s]
|
875 |
74%|ββββββββ | 8147/11074 [1:09:35<24:05, 2.03it/s]
|
876 |
74%|ββββββββ | 8148/11074 [1:09:36<24:05, 2.02it/s]
|
877 |
74%|ββββββββ | 8149/11074 [1:09:36<24:04, 2.02it/s]
|
878 |
74%|ββββββββ | 8150/11074 [1:09:37<24:04, 2.02it/s]
|
879 |
{'loss': 3.2225, 'grad_norm': 0.23790352046489716, 'learning_rate': 0.00019778079782860702, 'epoch': 10.3}
|
|
|
880 |
74%|ββββββββ | 8150/11074 [1:09:37<24:04, 2.02it/s]
|
881 |
74%|ββββββββ | 8151/11074 [1:09:37<24:05, 2.02it/s]
|
882 |
74%|ββββββββ | 8152/11074 [1:09:38<24:05, 2.02it/s]
|
883 |
74%|ββββββββ | 8153/11074 [1:09:38<24:04, 2.02it/s]
|
884 |
74%|ββββββββ | 8154/11074 [1:09:39<24:04, 2.02it/s]
|
885 |
74%|ββββββββ | 8155/11074 [1:09:39<24:03, 2.02it/s]
|
886 |
74%|ββββββββ | 8156/11074 [1:09:40<24:01, 2.02it/s]
|
887 |
74%|ββββββββ | 8157/11074 [1:09:40<24:00, 2.03it/s]
|
888 |
74%|ββββββββ | 8158/11074 [1:09:41<23:59, 2.03it/s]
|
889 |
74%|ββββββββ | 8159/11074 [1:09:41<23:58, 2.03it/s]
|
890 |
74%|ββββββββ | 8160/11074 [1:09:42<23:58, 2.03it/s]
|
891 |
74%|ββββββββ | 8161/11074 [1:09:42<23:56, 2.03it/s]
|
892 |
74%|ββββββββ | 8162/11074 [1:09:43<23:56, 2.03it/s]
|
893 |
74%|ββββββββ | 8163/11074 [1:09:43<23:55, 2.03it/s]
|
894 |
74%|ββββββββ | 8164/11074 [1:09:44<23:55, 2.03it/s]
|
895 |
74%|ββββββββ | 8165/11074 [1:09:44<23:55, 2.03it/s]
|
896 |
74%|ββββββββ | 8166/11074 [1:09:45<23:54, 2.03it/s]
|
897 |
74%|ββββββββ | 8167/11074 [1:09:45<23:55, 2.03it/s]
|
898 |
74%|ββββββββ | 8168/11074 [1:09:46<23:53, 2.03it/s]
|
899 |
74%|ββββββββ | 8169/11074 [1:09:46<23:53, 2.03it/s]
|
900 |
74%|ββββββββ | 8170/11074 [1:09:47<23:51, 2.03it/s]
|
901 |
74%|ββββββββ | 8171/11074 [1:09:47<23:49, 2.03it/s]
|
902 |
74%|ββββββββ | 8172/11074 [1:09:48<23:51, 2.03it/s]
|
903 |
74%|ββββββββ | 8173/11074 [1:09:48<23:49, 2.03it/s]
|
904 |
74%|ββββββββ | 8174/11074 [1:09:49<23:49, 2.03it/s]
|
905 |
74%|ββββββββ | 8175/11074 [1:09:49<23:49, 2.03it/s]
|
906 |
{'loss': 3.2276, 'grad_norm': 0.2349643111228943, 'learning_rate': 0.00019465109757138034, 'epoch': 10.33}
|
|
|
907 |
74%|ββββββββ | 8175/11074 [1:09:49<23:49, 2.03it/s]
|
908 |
74%|ββββββββ | 8176/11074 [1:09:50<23:50, 2.03it/s]
|
909 |
74%|ββββββββ | 8177/11074 [1:09:50<23:51, 2.02it/s]
|
910 |
74%|ββββββββ | 8178/11074 [1:09:51<23:51, 2.02it/s]
|
911 |
74%|ββββββββ | 8179/11074 [1:09:51<23:50, 2.02it/s]
|
912 |
74%|ββββββββ | 8180/11074 [1:09:52<23:48, 2.03it/s]
|
913 |
74%|ββββββββ | 8181/11074 [1:09:52<23:48, 2.02it/s]
|
914 |
74%|ββββββββ | 8182/11074 [1:09:53<23:47, 2.03it/s]
|
915 |
74%|ββββββββ | 8183/11074 [1:09:53<23:47, 2.03it/s]
|
916 |
74%|ββββββββ | 8184/11074 [1:09:54<23:47, 2.02it/s]
|
917 |
74%|ββββββββ | 8185/11074 [1:09:54<23:47, 2.02it/s]
|
918 |
74%|ββββββββ | 8186/11074 [1:09:55<23:46, 2.02it/s]
|
919 |
74%|ββββββββ | 8187/11074 [1:09:55<23:45, 2.03it/s]
|
920 |
74%|ββββββββ | 8188/11074 [1:09:56<23:43, 2.03it/s]
|
921 |
74%|ββββββββ | 8189/11074 [1:09:56<23:45, 2.02it/s]
|
922 |
74%|ββββββββ | 8190/11074 [1:09:56<23:44, 2.03it/s]
|
923 |
74%|ββββββββ | 8191/11074 [1:09:57<23:43, 2.03it/s]
|
924 |
74%|ββββββββ | 8192/11074 [1:09:57<23:43, 2.02it/s]
|
925 |
74%|ββββββββ | 8193/11074 [1:09:58<23:44, 2.02it/s]
|
926 |
74%|ββββββββ | 8194/11074 [1:09:58<23:43, 2.02it/s]
|
927 |
74%|ββββββββ | 8195/11074 [1:09:59<23:41, 2.03it/s]
|
928 |
74%|ββββββββ | 8196/11074 [1:09:59<23:41, 2.02it/s]
|
929 |
74%|ββββββββ | 8197/11074 [1:10:00<23:39, 2.03it/s]
|
930 |
74%|ββββββββ | 8198/11074 [1:10:00<23:39, 2.03it/s]
|
931 |
74%|ββββββββ | 8199/11074 [1:10:01<23:39, 2.03it/s]
|
932 |
74%|ββββββββ | 8200/11074 [1:10:01<23:39, 2.02it/s]
|
933 |
|
|
|
934 |
74%|ββββββββ | 8200/11074 [1:10:01<23:39, 2.02it/s]
|
935 |
74%|ββββββββ | 8201/11074 [1:10:02<23:39, 2.02it/s]
|
936 |
74%|ββββββββ | 8202/11074 [1:10:02<23:39, 2.02it/s]
|
937 |
74%|ββββββββ | 8203/11074 [1:10:03<23:38, 2.02it/s]
|
938 |
74%|ββββββββ | 8204/11074 [1:10:03<23:38, 2.02it/s]
|
939 |
74%|ββββββββ | 8205/11074 [1:10:04<23:37, 2.02it/s]
|
940 |
74%|ββββββββ | 8206/11074 [1:10:04<23:34, 2.03it/s]
|
941 |
74%|ββββββββ | 8207/11074 [1:10:05<23:35, 2.03it/s]
|
942 |
74%|ββββββββ | 8208/11074 [1:10:05<23:32, 2.03it/s]
|
943 |
74%|ββββββββ | 8209/11074 [1:10:06<23:33, 2.03it/s]
|
944 |
74%|ββββββββ | 8210/11074 [1:10:06<23:32, 2.03it/s]
|
945 |
74%|ββββββββ | 8211/11074 [1:10:07<23:32, 2.03it/s]
|
946 |
74%|ββββββββ | 8212/11074 [1:10:07<23:33, 2.03it/s]
|
947 |
74%|ββββββββ | 8213/11074 [1:10:08<23:32, 2.03it/s]
|
948 |
74%|ββββββββ | 8214/11074 [1:10:08<23:32, 2.02it/s]
|
949 |
74%|ββββββββ | 8215/11074 [1:10:09<23:33, 2.02it/s]
|
950 |
74%|ββββββββ | 8216/11074 [1:10:09<23:32, 2.02it/s]
|
951 |
74%|ββββββββ | 8217/11074 [1:10:10<23:31, 2.02it/s]
|
952 |
74%|ββββββββ | 8218/11074 [1:10:10<23:31, 2.02it/s]
|
953 |
74%|ββββββββ | 8219/11074 [1:10:11<23:29, 2.03it/s]
|
954 |
74%|βββββοΏ½οΏ½ββ | 8220/11074 [1:10:11<23:30, 2.02it/s]
|
955 |
74%|ββββββββ | 8221/11074 [1:10:12<23:29, 2.02it/s]
|
956 |
74%|ββββββββ | 8222/11074 [1:10:12<23:28, 2.02it/s]
|
957 |
74%|ββββββββ | 8223/11074 [1:10:13<23:27, 2.03it/s]
|
958 |
74%|ββββββββ | 8224/11074 [1:10:13<23:27, 2.02it/s]
|
959 |
74%|ββββββββ | 8225/11074 [1:10:14<23:27, 2.02it/s]{'loss': 3.2261, 'grad_norm': 0.24224504828453064, 'learning_rate': 0.00018844878253702113, 'epoch': 10.39}
|
960 |
|
|
|
961 |
74%|ββββββββ | 8225/11074 [1:10:14<23:27, 2.02it/s]
|
962 |
74%|ββββββββ | 8226/11074 [1:10:14<23:29, 2.02it/s]
|
963 |
74%|ββββββββ | 8227/11074 [1:10:15<23:26, 2.02it/s]
|
964 |
74%|ββββββββ | 8228/11074 [1:10:15<23:27, 2.02it/s]
|
965 |
74%|ββββββββ | 8229/11074 [1:10:16<23:24, 2.02it/s]
|
966 |
74%|ββββββββ | 8230/11074 [1:10:16<23:25, 2.02it/s]
|
967 |
74%|ββββββββ | 8231/11074 [1:10:17<23:25, 2.02it/s]
|
968 |
74%|ββββββββ | 8232/11074 [1:10:17<23:24, 2.02it/s]
|
969 |
74%|ββββββββ | 8233/11074 [1:10:18<23:23, 2.02it/s]
|
970 |
74%|ββββββββ | 8234/11074 [1:10:18<23:23, 2.02it/s]
|
971 |
74%|ββββββββ | 8235/11074 [1:10:19<23:22, 2.02it/s]
|
972 |
74%|ββββββββ | 8236/11074 [1:10:19<23:21, 2.03it/s]
|
973 |
74%|ββββββββ | 8237/11074 [1:10:20<23:21, 2.02it/s]
|
974 |
74%|ββββββββ | 8238/11074 [1:10:20<23:19, 2.03it/s]
|
975 |
74%|ββββββββ | 8239/11074 [1:10:21<23:20, 2.02it/s]
|
976 |
74%|ββββββββ | 8240/11074 [1:10:21<23:18, 2.03it/s]
|
977 |
74%|ββββββββ | 8241/11074 [1:10:22<23:20, 2.02it/s]
|
978 |
74%|ββββββββ | 8242/11074 [1:10:22<23:18, 2.03it/s]
|
979 |
74%|ββββββββ | 8243/11074 [1:10:23<23:18, 2.02it/s]
|
980 |
74%|ββββββββ | 8244/11074 [1:10:23<23:16, 2.03it/s]
|
981 |
74%|ββββββββ | 8245/11074 [1:10:24<23:15, 2.03it/s]
|
982 |
74%|ββββββββ | 8246/11074 [1:10:24<25:18, 1.86it/s]
|
983 |
74%|ββββββββ | 8247/11074 [1:10:25<24:41, 1.91it/s]
|
984 |
74%|ββββββββ | 8248/11074 [1:10:25<24:15, 1.94it/s]
|
985 |
74%|ββββββββ | 8249/11074 [1:10:26<23:56, 1.97it/s]
|
986 |
74%|ββββββββ | 8250/11074 [1:10:26<23:44, 1.98it/s]{'loss': 3.2294, 'grad_norm': 0.24246717989444733, 'learning_rate': 0.00018537655296280926, 'epoch': 10.42}
|
|
|
987 |
|
988 |
74%|ββββββββ | 8250/11074 [1:10:26<23:44, 1.98it/s]
|
989 |
75%|ββββββββ | 8251/11074 [1:10:27<23:37, 1.99it/s]
|
990 |
75%|ββββββββ | 8252/11074 [1:10:27<23:31, 2.00it/s]
|
991 |
75%|ββββββββ | 8253/11074 [1:10:28<23:23, 2.01it/s]
|
992 |
75%|ββββββββ | 8254/11074 [1:10:28<23:22, 2.01it/s]
|
993 |
75%|ββββββββ | 8255/11074 [1:10:29<23:16, 2.02it/s]
|
994 |
75%|ββββββββ | 8256/11074 [1:10:29<23:14, 2.02it/s]
|
995 |
75%|ββββββββ | 8257/11074 [1:10:30<23:12, 2.02it/s]
|
996 |
75%|ββββββββ | 8258/11074 [1:10:30<23:12, 2.02it/s]
|
997 |
75%|ββββββββ | 8259/11074 [1:10:31<23:12, 2.02it/s]
|
998 |
75%|ββββββββ | 8260/11074 [1:10:31<23:09, 2.02it/s]
|
999 |
75%|ββββββββ | 8261/11074 [1:10:32<23:09, 2.02it/s]
|
1000 |
75%|ββββββββ | 8262/11074 [1:10:32<23:07, 2.03it/s]
|
1001 |
75%|ββββββββ | 8263/11074 [1:10:33<23:08, 2.02it/s]
|
1002 |
75%|ββββββββ | 8264/11074 [1:10:33<23:06, 2.03it/s]
|
1003 |
75%|ββββββββ | 8265/11074 [1:10:34<23:06, 2.03it/s]
|
1004 |
75%|ββββββββ | 8266/11074 [1:10:34<23:07, 2.02it/s]
|
1005 |
75%|ββββββββ | 8267/11074 [1:10:35<23:06, 2.03it/s]
|
1006 |
75%|ββββββββ | 8268/11074 [1:10:35<23:05, 2.03it/s]
|
1007 |
75%|ββββββββ | 8269/11074 [1:10:36<23:04, 2.03it/s]
|
1008 |
75%|ββββββββ | 8270/11074 [1:10:36<23:04, 2.03it/s]
|
1009 |
75%|ββββββββ | 8271/11074 [1:10:37<23:04, 2.03it/s]
|
1010 |
75%|ββββββββ | 8272/11074 [1:10:37<23:03, 2.02it/s]
|
1011 |
75%|ββββββββ | 8273/11074 [1:10:38<23:02, 2.03it/s]
|
1012 |
75%|ββββββββ | 8274/11074 [1:10:38<23:01, 2.03it/s]
|
1013 |
75%|ββββββββ | 8275/11074 [1:10:39<23:00, 2.03it/s]{'loss': 3.2352, 'grad_norm': 0.23462679982185364, 'learning_rate': 0.00018232386349094988, 'epoch': 10.45}
|
|
|
1014 |
|
1015 |
75%|ββββββββ | 8275/11074 [1:10:39<23:00, 2.03it/s]
|
1016 |
75%|ββββββββ | 8276/11074 [1:10:39<23:02, 2.02it/s]
|
1017 |
75%|ββββββββ | 8277/11074 [1:10:40<23:02, 2.02it/s]
|
1018 |
75%|ββββββββ | 8278/11074 [1:10:40<23:01, 2.02it/s]
|
1019 |
75%|ββββββββ | 8279/11074 [1:10:41<23:00, 2.02it/s]
|
1020 |
75%|ββββββββ | 8280/11074 [1:10:41<22:59, 2.02it/s]
|
1021 |
75%|ββββββββ | 8281/11074 [1:10:42<22:58, 2.03it/s]
|
1022 |
75%|ββββββββ | 8282/11074 [1:10:42<22:58, 2.02it/s]
|
1023 |
75%|ββββββββ | 8283/11074 [1:10:43<22:57, 2.03it/s]
|
1024 |
75%|ββββββββ | 8284/11074 [1:10:43<22:56, 2.03it/s]
|
1025 |
75%|ββββββββ | 8285/11074 [1:10:44<22:56, 2.03it/s]
|
1026 |
75%|ββββββββ | 8286/11074 [1:10:44<22:56, 2.02it/s]
|
1027 |
75%|ββββββββ | 8287/11074 [1:10:45<22:56, 2.02it/s]
|
1028 |
75%|ββββββββ | 8288/11074 [1:10:45<22:56, 2.02it/s]
|
1029 |
75%|ββββββββ | 8289/11074 [1:10:46<22:54, 2.03it/s]
|
1030 |
75%|ββββββββ | 8290/11074 [1:10:46<22:54, 2.03it/s]
|
1031 |
75%|ββββββββ | 8291/11074 [1:10:47<22:52, 2.03it/s]
|
1032 |
75%|ββββββββ | 8292/11074 [1:10:47<22:53, 2.03it/s]
|
1033 |
75%|ββββββββ | 8293/11074 [1:10:48<22:51, 2.03it/s]
|
1034 |
75%|ββββββββ | 8294/11074 [1:10:48<22:54, 2.02it/s]
|
1035 |
75%|ββββββββ | 8295/11074 [1:10:48<22:52, 2.02it/s]
|
1036 |
75%|ββββββββ | 8296/11074 [1:10:49<22:51, 2.03it/s]
|
1037 |
75%|ββββββββ | 8297/11074 [1:10:49<22:49, 2.03it/s]
|
1038 |
75%|ββββββββ | 8298/11074 [1:10:50<22:47, 2.03it/s]
|
1039 |
75%|ββββββββ | 8299/11074 [1:10:50<22:49, 2.03it/s]
|
1040 |
75%|ββββββββ | 8300/11074 [1:10:51<22:48, 2.03it/s]
|
1041 |
|
|
|
1042 |
75%|ββββββββ | 8300/11074 [1:10:51<22:48, 2.03it/s]
|
1043 |
75%|ββββββββ | 8301/11074 [1:10:51<22:50, 2.02it/s]
|
1044 |
75%|ββββββββ | 8302/11074 [1:10:52<22:49, 2.02it/s]
|
1045 |
75%|ββββββββ | 8303/11074 [1:10:52<22:50, 2.02it/s]
|
1046 |
75%|ββββββββ | 8304/11074 [1:10:53<22:49, 2.02it/s]
|
1047 |
75%|ββββββββ | 8305/11074 [1:10:53<22:48, 2.02it/s]
|
1048 |
75%|ββββββββ | 8306/11074 [1:10:54<22:47, 2.02it/s]
|
1049 |
75%|ββββββββ | 8307/11074 [1:10:54<22:48, 2.02it/s]
|
1050 |
75%|ββββββββ | 8308/11074 [1:10:55<22:46, 2.02it/s]
|
1051 |
75%|ββββββββ | 8309/11074 [1:10:55<22:46, 2.02it/s]
|
1052 |
75%|ββββββββ | 8310/11074 [1:10:56<22:44, 2.03it/s]
|
1053 |
75%|ββββββββ | 8311/11074 [1:10:56<22:44, 2.03it/s]
|
1054 |
75%|ββββββββ | 8312/11074 [1:10:57<22:43, 2.03it/s]
|
1055 |
75%|ββββββββ | 8313/11074 [1:10:57<22:43, 2.02it/s]
|
1056 |
75%|ββββββββ | 8314/11074 [1:10:58<22:43, 2.02it/s]
|
1057 |
75%|ββββββββ | 8315/11074 [1:10:58<22:42, 2.02it/s]
|
1058 |
75%|ββββββββ | 8316/11074 [1:10:59<22:41, 2.03it/s]
|
1059 |
75%|ββββββββ | 8317/11074 [1:10:59<22:41, 2.03it/s]
|
1060 |
75%|ββββββββ | 8318/11074 [1:11:00<22:39, 2.03it/s]
|
1061 |
75%|ββββββββ | 8319/11074 [1:11:00<22:39, 2.03it/s]
|
1062 |
75%|ββββββββ | 8320/11074 [1:11:01<22:39, 2.03it/s]
|
1063 |
75%|ββββββββ | 8321/11074 [1:11:01<22:37, 2.03it/s]
|
1064 |
75%|ββββββββ | 8322/11074 [1:11:02<22:38, 2.03it/s]
|
1065 |
75%|ββββββββ | 8323/11074 [1:11:02<22:36, 2.03it/s]
|
1066 |
75%|ββββββββ | 8324/11074 [1:11:03<22:36, 2.03it/s]
|
1067 |
75%|ββββββββ | 8325/11074 [1:11:03<22:35, 2.03it/s]
|
1068 |
{'loss': 3.244, 'grad_norm': 0.24260075390338898, 'learning_rate': 0.0001762778619941043, 'epoch': 10.52}
|
|
|
1069 |
75%|ββββββββ | 8325/11074 [1:11:03<22:35, 2.03it/s]
|
1070 |
75%|ββββββββ | 8326/11074 [1:11:04<24:35, 1.86it/s]
|
1071 |
75%|ββββββββ | 8327/11074 [1:11:04<23:58, 1.91it/s]
|
1072 |
75%|ββββββββ | 8328/11074 [1:11:05<23:32, 1.94it/s]
|
1073 |
75%|ββββββββ | 8329/11074 [1:11:05<23:14, 1.97it/s]
|
1074 |
75%|ββββββββ | 8330/11074 [1:11:06<23:02, 1.99it/s]
|
1075 |
75%|ββββββββ | 8331/11074 [1:11:06<22:53, 2.00it/s]
|
1076 |
75%|ββββββββ | 8332/11074 [1:11:07<22:47, 2.00it/s]
|
1077 |
75%|ββββββββ | 8333/11074 [1:11:07<22:42, 2.01it/s]
|
1078 |
75%|ββββββββ | 8334/11074 [1:11:08<22:37, 2.02it/s]
|
1079 |
75%|ββββββββ | 8335/11074 [1:11:08<22:36, 2.02it/s]
|
1080 |
75%|ββββββββ | 8336/11074 [1:11:09<22:35, 2.02it/s]
|
1081 |
75%|ββββββββ | 8337/11074 [1:11:09<22:34, 2.02it/s]
|
1082 |
75%|ββββββββ | 8338/11074 [1:11:10<22:32, 2.02it/s]
|
1083 |
75%|ββββββββ | 8339/11074 [1:11:10<22:32, 2.02it/s]
|
1084 |
75%|ββββββββ | 8340/11074 [1:11:11<22:31, 2.02it/s]
|
1085 |
75%|ββββββββ | 8341/11074 [1:11:11<22:31, 2.02it/s]
|
1086 |
75%|ββββββββ | 8342/11074 [1:11:12<22:29, 2.02it/s]
|
1087 |
75%|ββββββββ | 8343/11074 [1:11:12<22:29, 2.02it/s]
|
1088 |
75%|ββββββββ | 8344/11074 [1:11:13<22:27, 2.03it/s]
|
1089 |
75%|ββββββββ | 8345/11074 [1:11:13<22:26, 2.03it/s]
|
1090 |
75%|ββββββββ | 8346/11074 [1:11:14<22:27, 2.03it/s]
|
1091 |
75%|ββββββββ | 8347/11074 [1:11:14<22:24, 2.03it/s]
|
1092 |
75%|ββββββββ | 8348/11074 [1:11:15<22:25, 2.03it/s]
|
1093 |
75%|ββββββββ | 8349/11074 [1:11:15<22:24, 2.03it/s]
|
1094 |
75%|ββββββββ | 8350/11074 [1:11:16<22:24, 2.03it/s]{'loss': 3.2382, 'grad_norm': 0.23930463194847107, 'learning_rate': 0.0001732849254639809, 'epoch': 10.55}
|
1095 |
|
|
|
1096 |
75%|ββββββββ | 8350/11074 [1:11:16<22:24, 2.03it/s]
|
1097 |
75%|ββββββββ | 8351/11074 [1:11:16<22:25, 2.02it/s]
|
1098 |
75%|ββββββββ | 8352/11074 [1:11:17<22:26, 2.02it/s]
|
1099 |
75%|ββββββββ | 8353/11074 [1:11:17<22:25, 2.02it/s]
|
1100 |
75%|ββββββββ | 8354/11074 [1:11:18<22:24, 2.02it/s]
|
1101 |
75%|ββββββββ | 8355/11074 [1:11:18<22:22, 2.03it/s]
|
1102 |
75%|ββββββββ | 8356/11074 [1:11:19<22:22, 2.02it/s]
|
1103 |
75%|ββββββββ | 8357/11074 [1:11:19<22:21, 2.03it/s]
|
1104 |
75%|ββββββββ | 8358/11074 [1:11:20<22:19, 2.03it/s]
|
1105 |
75%|ββββββββ | 8359/11074 [1:11:20<22:19, 2.03it/s]
|
1106 |
75%|ββββββββ | 8360/11074 [1:11:21<22:17, 2.03it/s]
|
1107 |
76%|ββββββββ | 8361/11074 [1:11:21<22:17, 2.03it/s]
|
1108 |
76%|ββββββββ | 8362/11074 [1:11:22<22:17, 2.03it/s]
|
1109 |
76%|ββββββββ | 8363/11074 [1:11:22<22:18, 2.03it/s]
|
1110 |
76%|ββββββββ | 8364/11074 [1:11:23<22:17, 2.03it/s]
|
1111 |
76%|ββββββββ | 8365/11074 [1:11:23<22:18, 2.02it/s]
|
1112 |
76%|ββββββββ | 8366/11074 [1:11:24<22:15, 2.03it/s]
|
1113 |
76%|ββββββββ | 8367/11074 [1:11:24<22:16, 2.03it/s]
|
1114 |
76%|ββββββββ | 8368/11074 [1:11:25<22:16, 2.02it/s]
|
1115 |
76%|ββββββββ | 8369/11074 [1:11:25<22:16, 2.02it/s]
|
1116 |
76%|ββββββββ | 8370/11074 [1:11:26<22:16, 2.02it/s]
|
1117 |
76%|ββββββββ | 8371/11074 [1:11:26<22:15, 2.02it/s]
|
1118 |
76%|ββββββββ | 8372/11074 [1:11:27<22:15, 2.02it/s]
|
1119 |
76%|ββββββββ | 8373/11074 [1:11:27<22:14, 2.02it/s]
|
1120 |
76%|ββββββββ | 8374/11074 [1:11:28<22:15, 2.02it/s]
|
1121 |
76%|ββββββββ | 8375/11074 [1:11:28<22:13, 2.02it/s]{'loss': 3.2304, 'grad_norm': 0.23772986233234406, 'learning_rate': 0.000170312280002615, 'epoch': 10.58}
|
1122 |
|
|
|
1123 |
76%|ββββββββ | 8375/11074 [1:11:28<22:13, 2.02it/s]
|
1124 |
76%|ββββββββ | 8376/11074 [1:11:29<22:15, 2.02it/s]
|
1125 |
76%|ββββββββ | 8377/11074 [1:11:29<22:13, 2.02it/s]
|
1126 |
76%|ββββββββ | 8378/11074 [1:11:30<22:13, 2.02it/s]
|
1127 |
76%|ββββββββ | 8379/11074 [1:11:30<22:11, 2.02it/s]
|
1128 |
76%|ββββββββ | 8380/11074 [1:11:31<22:12, 2.02it/s]
|
1129 |
76%|ββββββββ | 8381/11074 [1:11:31<22:11, 2.02it/s]
|
1130 |
76%|ββββββββ | 8382/11074 [1:11:32<22:11, 2.02it/s]
|
1131 |
76%|ββββββββ | 8383/11074 [1:11:32<22:09, 2.02it/s]
|
1132 |
76%|ββββββββ | 8384/11074 [1:11:33<22:09, 2.02it/s]
|
1133 |
76%|ββββββββ | 8385/11074 [1:11:33<22:07, 2.03it/s]
|
1134 |
76%|ββββββββ | 8386/11074 [1:11:34<22:07, 2.02it/s]
|
1135 |
76%|ββββββββ | 8387/11074 [1:11:34<22:07, 2.02it/s]
|
1136 |
76%|ββββββββ | 8388/11074 [1:11:35<22:06, 2.02it/s]
|
1137 |
76%|ββββββββ | 8389/11074 [1:11:35<22:06, 2.02it/s]
|
1138 |
76%|ββββββββ | 8390/11074 [1:11:36<22:04, 2.03it/s]
|
1139 |
76%|ββββββββ | 8391/11074 [1:11:36<22:05, 2.02it/s]
|
1140 |
76%|ββββββββ | 8392/11074 [1:11:37<22:03, 2.03it/s]
|
1141 |
76%|ββββββββ | 8393/11074 [1:11:37<22:04, 2.02it/s]
|
1142 |
76%|ββββββββ | 8394/11074 [1:11:38<22:02, 2.03it/s]
|
1143 |
76%|ββββββββ | 8395/11074 [1:11:38<22:02, 2.03it/s]
|
1144 |
76%|ββββββββ | 8396/11074 [1:11:39<22:01, 2.03it/s]
|
1145 |
76%|ββββββββ | 8397/11074 [1:11:39<22:00, 2.03it/s]
|
1146 |
76%|ββββββββ | 8398/11074 [1:11:39<22:01, 2.03it/s]
|
1147 |
76%|ββββββββ | 8399/11074 [1:11:40<22:00, 2.03it/s]
|
1148 |
76%|ββββββββ | 8400/11074 [1:11:40<22:00, 2.02it/s]{'loss': 3.2414, 'grad_norm': 0.23746074736118317, 'learning_rate': 0.00016736011023005699, 'epoch': 10.61}
|
|
|
1149 |
|
1150 |
76%|ββββββββ | 8400/11074 [1:11:40<22:00, 2.02it/s]
|
1151 |
76%|ββββββββ | 8401/11074 [1:11:41<22:01, 2.02it/s]
|
1152 |
76%|ββββββββ | 8402/11074 [1:11:41<22:02, 2.02it/s]
|
1153 |
76%|ββββββββ | 8403/11074 [1:11:42<22:00, 2.02it/s]
|
1154 |
76%|ββββββββ | 8404/11074 [1:11:42<22:01, 2.02it/s]
|
1155 |
76%|ββββββββ | 8405/11074 [1:11:43<21:58, 2.02it/s]
|
1156 |
76%|ββββββββ | 8406/11074 [1:11:43<21:59, 2.02it/s]
|
1157 |
76%|ββββββββ | 8407/11074 [1:11:44<21:57, 2.02it/s]
|
1158 |
76%|ββββββββ | 8408/11074 [1:11:44<21:57, 2.02it/s]
|
1159 |
76%|ββββββββ | 8409/11074 [1:11:45<21:57, 2.02it/s]
|
1160 |
76%|βοΏ½οΏ½οΏ½ββββββ | 8410/11074 [1:11:45<21:57, 2.02it/s]
|
1161 |
76%|ββββββββ | 8411/11074 [1:11:46<21:57, 2.02it/s]
|
1162 |
76%|ββββββββ | 8412/11074 [1:11:46<21:56, 2.02it/s]
|
1163 |
76%|ββββββββ | 8413/11074 [1:11:47<21:56, 2.02it/s]
|
1164 |
76%|ββββββββ | 8414/11074 [1:11:47<21:55, 2.02it/s]
|
1165 |
76%|ββββββββ | 8415/11074 [1:11:48<21:55, 2.02it/s]
|
1166 |
76%|ββββββββ | 8416/11074 [1:11:48<21:54, 2.02it/s]
|
1167 |
76%|ββββββββ | 8417/11074 [1:11:49<21:54, 2.02it/s]
|
1168 |
76%|ββββββββ | 8418/11074 [1:11:49<21:53, 2.02it/s]
|
1169 |
76%|ββββββββ | 8419/11074 [1:11:50<21:54, 2.02it/s]
|
1170 |
76%|ββββββββ | 8420/11074 [1:11:50<21:53, 2.02it/s]
|
1171 |
76%|ββββββββ | 8421/11074 [1:11:51<21:51, 2.02it/s]
|
1172 |
76%|ββββββββ | 8422/11074 [1:11:51<21:51, 2.02it/s]
|
1173 |
76%|ββββββββ | 8423/11074 [1:11:52<21:51, 2.02it/s]
|
1174 |
76%|ββββββββ | 8424/11074 [1:11:52<21:52, 2.02it/s]
|
1175 |
76%|ββββββββ | 8425/11074 [1:11:53<21:50, 2.02it/s]{'loss': 3.2383, 'grad_norm': 0.23675471544265747, 'learning_rate': 0.0001644285994946871, 'epoch': 10.64}
|
1176 |
|
|
|
1177 |
76%|ββββββββ | 8425/11074 [1:11:53<21:50, 2.02it/s]
|
1178 |
76%|ββββββββ | 8426/11074 [1:11:53<21:56, 2.01it/s]
|
1179 |
76%|ββββββββ | 8427/11074 [1:11:54<21:52, 2.02it/s]
|
1180 |
76%|ββββββββ | 8428/11074 [1:11:54<21:50, 2.02it/s]
|
1181 |
76%|ββββββββ | 8429/11074 [1:11:55<21:47, 2.02it/s]
|
1182 |
76%|ββββββββ | 8430/11074 [1:11:55<21:48, 2.02it/s]
|
1183 |
76%|ββββββββ | 8431/11074 [1:11:56<21:46, 2.02it/s]
|
1184 |
76%|ββββββββ | 8432/11074 [1:11:56<21:46, 2.02it/s]
|
1185 |
76%|ββββββββ | 8433/11074 [1:11:57<21:45, 2.02it/s]
|
1186 |
76%|ββββββββ | 8434/11074 [1:11:57<21:43, 2.03it/s]
|
1187 |
76%|ββββββββ | 8435/11074 [1:11:58<21:43, 2.02it/s]
|
1188 |
76%|ββββββββ | 8436/11074 [1:11:58<21:41, 2.03it/s]
|
1189 |
76%|ββββββββ | 8437/11074 [1:11:59<21:41, 2.03it/s]
|
1190 |
76%|ββββββββ | 8438/11074 [1:11:59<21:41, 2.03it/s]
|
1191 |
76%|ββββββββ | 8439/11074 [1:12:00<21:40, 2.03it/s]
|
1192 |
76%|ββββββββ | 8440/11074 [1:12:00<21:40, 2.02it/s]
|
1193 |
76%|ββββββββ | 8441/11074 [1:12:01<21:38, 2.03it/s]
|
1194 |
76%|ββββββββ | 8442/11074 [1:12:01<21:39, 2.03it/s]
|
1195 |
76%|ββββββββ | 8443/11074 [1:12:02<21:37, 2.03it/s]
|
1196 |
76%|ββββββββ | 8444/11074 [1:12:02<21:37, 2.03it/s]
|
1197 |
76%|ββββββββ | 8445/11074 [1:12:03<21:37, 2.03it/s]
|
1198 |
76%|ββββββββ | 8446/11074 [1:12:03<21:38, 2.02it/s]
|
1199 |
76%|ββββββββ | 8447/11074 [1:12:04<21:38, 2.02it/s]
|
1200 |
76%|ββββββββ | 8448/11074 [1:12:04<21:38, 2.02it/s]
|
1201 |
76%|ββββββββ | 8449/11074 [1:12:05<21:36, 2.02it/s]
|
1202 |
76%|ββββββββ | 8450/11074 [1:12:05<21:34, 2.03it/s]
|
1203 |
|
|
|
1204 |
76%|ββββββββ | 8450/11074 [1:12:05<21:34, 2.03it/s]
|
1205 |
76%|ββββββββ | 8451/11074 [1:12:06<21:37, 2.02it/s]
|
1206 |
76%|ββββββββ | 8452/11074 [1:12:06<21:34, 2.03it/s]
|
1207 |
76%|ββββββββ | 8453/11074 [1:12:07<21:34, 2.02it/s]
|
1208 |
76%|ββββββββ | 8454/11074 [1:12:07<21:33, 2.03it/s]
|
1209 |
76%|ββββββββ | 8455/11074 [1:12:08<21:33, 2.02it/s]
|
1210 |
76%|ββββββββ | 8456/11074 [1:12:08<21:33, 2.02it/s]
|
1211 |
76%|ββββββββ | 8457/11074 [1:12:09<21:34, 2.02it/s]
|
1212 |
76%|ββββββββ | 8458/11074 [1:12:09<21:33, 2.02it/s]
|
1213 |
76%|ββββββββ | 8459/11074 [1:12:10<21:32, 2.02it/s]
|
1214 |
76%|ββββββββ | 8460/11074 [1:12:10<21:30, 2.03it/s]
|
1215 |
76%|ββββββββ | 8461/11074 [1:12:11<21:31, 2.02it/s]
|
1216 |
76%|ββββββββ | 8462/11074 [1:12:11<21:29, 2.02it/s]
|
1217 |
76%|ββββββββ | 8463/11074 [1:12:12<21:30, 2.02it/s]
|
1218 |
76%|ββββββββ | 8464/11074 [1:12:12<21:29, 2.02it/s]
|
1219 |
76%|ββββββββ | 8465/11074 [1:12:13<21:28, 2.02it/s]
|
1220 |
76%|ββββββββ | 8466/11074 [1:12:13<21:28, 2.02it/s]
|
1221 |
76%|ββββββββ | 8467/11074 [1:12:14<21:27, 2.03it/s]
|
1222 |
76%|ββββββββ | 8468/11074 [1:12:14<21:27, 2.02it/s]
|
1223 |
76%|ββββββββ | 8469/11074 [1:12:15<21:26, 2.03it/s]
|
1224 |
76%|ββββββββ | 8470/11074 [1:12:15<21:26, 2.02it/s]
|
1225 |
76%|ββββββββ | 8471/11074 [1:12:16<21:24, 2.03it/s]
|
1226 |
77%|ββββββββ | 8472/11074 [1:12:16<21:24, 2.03it/s]
|
1227 |
77%|ββββββββ | 8473/11074 [1:12:17<21:24, 2.03it/s]
|
1228 |
77%|ββββββββ | 8474/11074 [1:12:17<21:23, 2.02it/s]
|
1229 |
77%|ββββββββ | 8475/11074 [1:12:18<21:23, 2.03it/s]{'loss': 3.2461, 'grad_norm': 0.23413671553134918, 'learning_rate': 0.00015862828210244434, 'epoch': 10.71}
|
1230 |
|
|
|
1231 |
77%|ββββββββ | 8475/11074 [1:12:18<21:23, 2.03it/s]
|
1232 |
77%|ββββββββ | 8476/11074 [1:12:18<21:25, 2.02it/s]
|
1233 |
77%|ββββββββ | 8477/11074 [1:12:19<21:23, 2.02it/s]
|
1234 |
77%|ββββββββ | 8478/11074 [1:12:19<21:23, 2.02it/s]
|
1235 |
77%|ββββββββ | 8479/11074 [1:12:20<21:22, 2.02it/s]
|
1236 |
77%|ββββββββ | 8480/11074 [1:12:20<21:21, 2.02it/s]
|
1237 |
77%|ββββββββ | 8481/11074 [1:12:21<21:21, 2.02it/s]
|
1238 |
77%|ββββββββ | 8482/11074 [1:12:21<21:20, 2.02it/s]
|
1239 |
77%|ββββββββ | 8483/11074 [1:12:22<21:20, 2.02it/s]
|
1240 |
77%|ββββββββ | 8484/11074 [1:12:22<21:19, 2.02it/s]
|
1241 |
77%|ββββββββ | 8485/11074 [1:12:22<21:20, 2.02it/s]
|
1242 |
77%|ββββββββ | 8486/11074 [1:12:23<21:19, 2.02it/s]
|
1243 |
77%|ββββββββ | 8487/11074 [1:12:23<21:18, 2.02it/s]
|
1244 |
77%|ββββββββ | 8488/11074 [1:12:24<21:17, 2.03it/s]
|
1245 |
77%|ββββββββ | 8489/11074 [1:12:24<21:16, 2.02it/s]
|
1246 |
77%|ββββββββ | 8490/11074 [1:12:25<21:15, 2.03it/s]
|
1247 |
77%|ββββββββ | 8491/11074 [1:12:25<21:16, 2.02it/s]
|
1248 |
77%|ββββββββ | 8492/11074 [1:12:26<21:14, 2.03it/s]
|
1249 |
77%|ββββββββ | 8493/11074 [1:12:26<21:14, 2.02it/s]
|
1250 |
77%|ββββββββ | 8494/11074 [1:12:27<21:13, 2.03it/s]
|
1251 |
77%|ββββββββ | 8495/11074 [1:12:27<21:14, 2.02it/s]
|
1252 |
77%|ββββββββ | 8496/11074 [1:12:28<21:13, 2.02it/s]
|
1253 |
77%|ββββββββ | 8497/11074 [1:12:28<21:12, 2.03it/s]
|
1254 |
77%|ββββββββ | 8498/11074 [1:12:29<21:12, 2.02it/s]
|
1255 |
77%|ββββββββ | 8499/11074 [1:12:29<21:12, 2.02it/s]
|
1256 |
77%|ββββββββ | 8500/11074 [1:12:30<21:12, 2.02it/s]
|
1257 |
{'loss': 3.2367, 'grad_norm': 0.23599669337272644, 'learning_rate': 0.0001557598356819, 'epoch': 10.74}
|
|
|
1258 |
77%|ββββββββ | 8500/11074 [1:12:30<21:12, 2.02it/s]
|
1259 |
77%|ββββββββ | 8501/11074 [1:12:30<21:14, 2.02it/s]
|
1260 |
77%|ββββββββ | 8502/11074 [1:12:31<21:12, 2.02it/s]
|
1261 |
77%|ββββββββ | 8503/11074 [1:12:31<21:12, 2.02it/s]
|
1262 |
77%|ββββββββ | 8504/11074 [1:12:32<21:10, 2.02it/s]
|
1263 |
77%|ββββββββ | 8505/11074 [1:12:32<21:09, 2.02it/s]
|
1264 |
77%|ββββββββ | 8506/11074 [1:12:33<21:09, 2.02it/s]
|
1265 |
77%|ββββββββ | 8507/11074 [1:12:33<21:07, 2.02it/s]
|
1266 |
77%|ββββββββ | 8508/11074 [1:12:34<21:08, 2.02it/s]
|
1267 |
77%|ββββββββ | 8509/11074 [1:12:34<21:08, 2.02it/s]
|
1268 |
77%|ββββββββ | 8510/11074 [1:12:35<21:07, 2.02it/s]
|
1269 |
77%|ββββββββ | 8511/11074 [1:12:35<21:06, 2.02it/s]
|
1270 |
77%|ββββββββ | 8512/11074 [1:12:36<21:06, 2.02it/s]
|
1271 |
77%|ββββββββ | 8513/11074 [1:12:36<21:03, 2.03it/s]
|
1272 |
77%|ββββββββ | 8514/11074 [1:12:37<21:03, 2.03it/s]
|
1273 |
77%|ββββββββ | 8515/11074 [1:12:37<21:03, 2.03it/s]
|
1274 |
77%|ββββββββ | 8516/11074 [1:12:38<21:03, 2.02it/s]
|
1275 |
77%|ββββββββ | 8517/11074 [1:12:38<21:02, 2.03it/s]
|
1276 |
77%|ββββββββ | 8518/11074 [1:12:39<21:01, 2.03it/s]
|
1277 |
77%|ββββββββ | 8519/11074 [1:12:39<21:01, 2.03it/s]
|
1278 |
77%|ββββββββ | 8520/11074 [1:12:40<20:59, 2.03it/s]
|
1279 |
77%|ββββββββ | 8521/11074 [1:12:40<21:00, 2.03it/s]
|
1280 |
77%|ββββββββ | 8522/11074 [1:12:41<20:58, 2.03it/s]
|
1281 |
77%|ββββββββ | 8523/11074 [1:12:41<20:59, 2.03it/s]
|
1282 |
77%|ββββββββ | 8524/11074 [1:12:42<20:58, 2.03it/s]
|
1283 |
77%|ββββββββ | 8525/11074 [1:12:42<20:58, 2.03it/s]{'loss': 3.2446, 'grad_norm': 0.2339746206998825, 'learning_rate': 0.00015291276874882887, 'epoch': 10.77}
|
|
|
1284 |
|
1285 |
77%|ββββββββ | 8525/11074 [1:12:42<20:58, 2.03it/s]
|
1286 |
77%|ββββββββ | 8526/11074 [1:12:43<20:59, 2.02it/s]
|
1287 |
77%|ββββββββ | 8527/11074 [1:12:43<20:59, 2.02it/s]
|
1288 |
77%|ββββββββ | 8528/11074 [1:12:44<20:58, 2.02it/s]
|
1289 |
77%|ββββββββ | 8529/11074 [1:12:44<20:57, 2.02it/s]
|
1290 |
77%|ββββββββ | 8530/11074 [1:12:45<20:57, 2.02it/s]
|
1291 |
77%|ββββββββ | 8531/11074 [1:12:45<20:55, 2.03it/s]
|
1292 |
77%|ββββββββ | 8532/11074 [1:12:46<20:55, 2.02it/s]
|
1293 |
77%|ββββββββ | 8533/11074 [1:12:46<20:54, 2.03it/s]
|
1294 |
77%|ββββββββ | 8534/11074 [1:12:47<20:54, 2.02it/s]
|
1295 |
77%|ββββββββ | 8535/11074 [1:12:47<20:53, 2.03it/s]
|
1296 |
77%|ββββββββ | 8536/11074 [1:12:48<20:53, 2.02it/s]
|
1297 |
77%|ββββββββ | 8537/11074 [1:12:48<20:54, 2.02it/s]
|
1298 |
77%|ββββββββ | 8538/11074 [1:12:49<20:53, 2.02it/s]
|
1299 |
77%|ββββββββ | 8539/11074 [1:12:49<20:54, 2.02it/s]
|
1300 |
77%|ββββββββ | 8540/11074 [1:12:50<20:55, 2.02it/s]
|
1301 |
77%|ββββββββ | 8541/11074 [1:12:50<20:54, 2.02it/s]
|
1302 |
77%|ββββββββ | 8542/11074 [1:12:51<20:52, 2.02it/s]
|
1303 |
77%|ββββββββ | 8543/11074 [1:12:51<20:52, 2.02it/s]
|
1304 |
77%|ββββββββ | 8544/11074 [1:12:52<20:51, 2.02it/s]
|
1305 |
77%|ββββββββ | 8545/11074 [1:12:52<20:51, 2.02it/s]
|
1306 |
77%|ββββββββ | 8546/11074 [1:12:53<20:50, 2.02it/s]
|
1307 |
77%|ββββββββ | 8547/11074 [1:12:53<20:48, 2.02it/s]
|
1308 |
77%|ββββββββ | 8548/11074 [1:12:54<20:48, 2.02it/s]
|
1309 |
77%|ββββββββ | 8549/11074 [1:12:54<20:47, 2.02it/s]
|
1310 |
77%|ββββββββ | 8550/11074 [1:12:55<20:47, 2.02it/s]{'loss': 3.24, 'grad_norm': 0.24058009684085846, 'learning_rate': 0.00015008725812406143, 'epoch': 10.8}
|
|
|
1311 |
|
1312 |
77%|ββββββββ | 8550/11074 [1:12:55<20:47, 2.02it/s]
|
1313 |
77%|ββββββββ | 8551/11074 [1:12:55<20:48, 2.02it/s]
|
1314 |
77%|ββββββββ | 8552/11074 [1:12:56<20:48, 2.02it/s]
|
1315 |
77%|ββββββββ | 8553/11074 [1:12:56<20:47, 2.02it/s]
|
1316 |
77%|ββββββββ | 8554/11074 [1:12:57<20:46, 2.02it/s]
|
1317 |
77%|ββββββββ | 8555/11074 [1:12:57<20:46, 2.02it/s]
|
1318 |
77%|ββββββββ | 8556/11074 [1:12:58<20:44, 2.02it/s]
|
1319 |
77%|ββββββββ | 8557/11074 [1:12:58<20:43, 2.02it/s]
|
1320 |
77%|ββββββββ | 8558/11074 [1:12:59<20:42, 2.03it/s]
|
1321 |
77%|ββββββββ | 8559/11074 [1:12:59<20:41, 2.03it/s]
|
1322 |
77%|ββββββββ | 8560/11074 [1:13:00<20:41, 2.03it/s]
|
1323 |
77%|ββββββββ | 8561/11074 [1:13:00<20:41, 2.02it/s]
|
1324 |
77%|ββββββββ | 8562/11074 [1:13:01<20:41, 2.02it/s]
|
1325 |
77%|ββββββββ | 8563/11074 [1:13:01<20:40, 2.02it/s]
|
1326 |
77%|ββββββββ | 8564/11074 [1:13:02<20:40, 2.02it/s]
|
1327 |
77%|ββββββββ | 8565/11074 [1:13:02<20:38, 2.03it/s]
|
1328 |
77%|ββββββββ | 8566/11074 [1:13:03<20:39, 2.02it/s]
|
1329 |
77%|ββββββββ | 8567/11074 [1:13:03<20:36, 2.03it/s]
|
1330 |
77%|ββββββββ | 8568/11074 [1:13:04<20:37, 2.03it/s]
|
1331 |
77%|ββββββββ | 8569/11074 [1:13:04<20:36, 2.03it/s]
|
1332 |
77%|ββββββββ | 8570/11074 [1:13:04<20:36, 2.02it/s]
|
1333 |
77%|ββββββββ | 8571/11074 [1:13:05<20:36, 2.02it/s]
|
1334 |
77%|ββββββββ | 8572/11074 [1:13:05<20:34, 2.03it/s]
|
1335 |
77%|ββββββββ | 8573/11074 [1:13:06<20:36, 2.02it/s]
|
1336 |
77%|ββββββββ | 8574/11074 [1:13:06<20:33, 2.03it/s]
|
1337 |
77%|ββββββββ | 8575/11074 [1:13:07<20:34, 2.02it/s]{'loss': 3.2475, 'grad_norm': 0.23284657299518585, 'learning_rate': 0.00014728347928964547, 'epoch': 10.83}
|
|
|
1338 |
|
1339 |
77%|ββββββββ | 8575/11074 [1:13:07<20:34, 2.02it/s]
|
1340 |
77%|ββββββββ | 8576/11074 [1:13:07<20:34, 2.02it/s]
|
1341 |
77%|ββββββββ | 8577/11074 [1:13:08<20:33, 2.02it/s]
|
1342 |
77%|ββββββββ | 8578/11074 [1:13:08<20:33, 2.02it/s]
|
1343 |
77%|ββββββββ | 8579/11074 [1:13:09<20:33, 2.02it/s]
|
1344 |
77%|ββββββββ | 8580/11074 [1:13:09<20:32, 2.02it/s]
|
1345 |
77%|ββββββββ | 8581/11074 [1:13:10<20:31, 2.02it/s]
|
1346 |
77%|ββββββββ | 8582/11074 [1:13:10<20:31, 2.02it/s]
|
1347 |
78%|ββββββββ | 8583/11074 [1:13:11<20:29, 2.03it/s]
|
1348 |
78%|ββββββββ | 8584/11074 [1:13:11<20:29, 2.02it/s]
|
1349 |
78%|ββββββββ | 8585/11074 [1:13:12<20:29, 2.02it/s]
|
1350 |
78%|ββββββββ | 8586/11074 [1:13:12<20:30, 2.02it/s]
|
1351 |
78%|ββββββββ | 8587/11074 [1:13:13<20:28, 2.02it/s]
|
1352 |
78%|ββββββββ | 8588/11074 [1:13:13<20:30, 2.02it/s]
|
1353 |
78%|ββββββββ | 8589/11074 [1:13:14<20:37, 2.01it/s]
|
1354 |
78%|ββββββββ | 8590/11074 [1:13:14<20:33, 2.01it/s]
|
1355 |
78%|ββββββββ | 8591/11074 [1:13:15<20:30, 2.02it/s]
|
1356 |
78%|ββββββββ | 8592/11074 [1:13:15<20:29, 2.02it/s]
|
1357 |
78%|ββββββββ | 8593/11074 [1:13:16<20:28, 2.02it/s]
|
1358 |
78%|ββββββββ | 8594/11074 [1:13:16<20:27, 2.02it/s]
|
1359 |
78%|ββββββββ | 8595/11074 [1:13:17<20:26, 2.02it/s]
|
1360 |
78%|ββββββββ | 8596/11074 [1:13:17<20:26, 2.02it/s]
|
1361 |
78%|ββββββββ | 8597/11074 [1:13:18<20:25, 2.02it/s]
|
1362 |
78%|ββββββββ | 8598/11074 [1:13:18<20:25, 2.02it/s]
|
1363 |
78%|ββββββββ | 8599/11074 [1:13:19<20:24, 2.02it/s]
|
1364 |
78%|ββββββββ | 8600/11074 [1:13:19<20:23, 2.02it/s]{'loss': 3.2479, 'grad_norm': 0.2400185614824295, 'learning_rate': 0.00014450160637794786, 'epoch': 10.87}
|
|
|
1365 |
|
1366 |
78%|ββββββββ | 8600/11074 [1:13:19<20:23, 2.02it/s]
|
1367 |
78%|ββββββββ | 8601/11074 [1:13:20<20:24, 2.02it/s]
|
1368 |
78%|ββββββββ | 8602/11074 [1:13:20<20:22, 2.02it/s]
|
1369 |
78%|ββββββββ | 8603/11074 [1:13:21<20:21, 2.02it/s]
|
1370 |
78%|ββββββββ | 8604/11074 [1:13:21<20:20, 2.02it/s]
|
1371 |
78%|ββββββββ | 8605/11074 [1:13:22<20:21, 2.02it/s]
|
1372 |
78%|ββββββββ | 8606/11074 [1:13:22<20:20, 2.02it/s]
|
1373 |
78%|ββββββββ | 8607/11074 [1:13:23<20:19, 2.02it/s]
|
1374 |
78%|ββββββββ | 8608/11074 [1:13:23<20:17, 2.02it/s]
|
1375 |
78%|ββββββββ | 8609/11074 [1:13:24<20:18, 2.02it/s]
|
1376 |
78%|ββββββββ | 8610/11074 [1:13:24<20:17, 2.02it/s]
|
1377 |
78%|ββββββββ | 8611/11074 [1:13:25<20:18, 2.02it/s]
|
1378 |
78%|ββββββββ | 8612/11074 [1:13:25<20:16, 2.02it/s]
|
1379 |
78%|ββββββββ | 8613/11074 [1:13:26<20:16, 2.02it/s]
|
1380 |
78%|ββββββββ | 8614/11074 [1:13:26<20:14, 2.02it/s]
|
1381 |
78%|ββββββββ | 8615/11074 [1:13:27<20:15, 2.02it/s]
|
1382 |
78%|ββββββββ | 8616/11074 [1:13:27<20:14, 2.02it/s]
|
1383 |
78%|ββββββββ | 8617/11074 [1:13:28<20:14, 2.02it/s]
|
1384 |
78%|ββββββββ | 8618/11074 [1:13:28<20:13, 2.02it/s]
|
1385 |
78%|ββββββββ | 8619/11074 [1:13:29<20:11, 2.03it/s]
|
1386 |
78%|ββββββββ | 8620/11074 [1:13:29<20:12, 2.02it/s]
|
1387 |
78%|ββββββββ | 8621/11074 [1:13:30<20:10, 2.03it/s]
|
1388 |
78%|ββββββββ | 8622/11074 [1:13:30<20:11, 2.02it/s]
|
1389 |
78%|ββββββββ | 8623/11074 [1:13:31<20:09, 2.03it/s]
|
1390 |
78%|ββββββββ | 8624/11074 [1:13:31<20:09, 2.03it/s]
|
1391 |
78%|ββββββββ | 8625/11074 [1:13:32<20:08, 2.03it/s]{'loss': 3.2424, 'grad_norm': 0.24160033464431763, 'learning_rate': 0.00014174181216083863, 'epoch': 10.9}
|
|
|
1392 |
|
1393 |
78%|ββββββββ | 8625/11074 [1:13:32<20:08, 2.03it/s]
|
1394 |
78%|ββββββββ | 8626/11074 [1:13:32<20:14, 2.02it/s]
|
1395 |
78%|ββββββββ | 8627/11074 [1:13:33<20:11, 2.02it/s]
|
1396 |
78%|ββββββββ | 8628/11074 [1:13:33<20:10, 2.02it/s]
|
1397 |
78%|ββββββββ | 8629/11074 [1:13:34<20:08, 2.02it/s]
|
1398 |
78%|ββββββββ | 8630/11074 [1:13:34<20:09, 2.02it/s]
|
1399 |
78%|ββββββββ | 8631/11074 [1:13:35<20:06, 2.02it/s]
|
1400 |
78%|ββββββββ | 8632/11074 [1:13:35<20:08, 2.02it/s]
|
1401 |
78%|ββββββββ | 8633/11074 [1:13:36<20:06, 2.02it/s]
|
1402 |
78%|ββββββββ | 8634/11074 [1:13:36<20:05, 2.02it/s]
|
1403 |
78%|ββββββββ | 8635/11074 [1:13:37<20:04, 2.02it/s]
|
1404 |
78%|ββββββββ | 8636/11074 [1:13:37<20:04, 2.02it/s]
|
1405 |
78%|ββββββββ | 8637/11074 [1:13:38<20:02, 2.03it/s]
|
1406 |
78%|ββββββββ | 8638/11074 [1:13:38<20:02, 2.03it/s]
|
1407 |
78%|ββββββββ | 8639/11074 [1:13:39<20:03, 2.02it/s]
|
1408 |
78%|ββββββββ | 8640/11074 [1:13:39<20:03, 2.02it/s]
|
1409 |
78%|ββββββββ | 8641/11074 [1:13:40<20:02, 2.02it/s]
|
1410 |
78%|ββββββββ | 8642/11074 [1:13:40<20:03, 2.02it/s]
|
1411 |
78%|ββββββββ | 8643/11074 [1:13:41<20:02, 2.02it/s]
|
1412 |
78%|ββββββββ | 8644/11074 [1:13:41<20:02, 2.02it/s]
|
1413 |
78%|ββββββββ | 8645/11074 [1:13:42<20:00, 2.02it/s]
|
1414 |
78%|ββββββββ | 8646/11074 [1:13:42<20:01, 2.02it/s]
|
1415 |
78%|ββββββββ | 8647/11074 [1:13:43<20:00, 2.02it/s]
|
1416 |
78%|ββββββββ | 8648/11074 [1:13:43<20:00, 2.02it/s]
|
1417 |
78%|ββββββββ | 8649/11074 [1:13:44<19:59, 2.02it/s]
|
1418 |
78%|ββββββββ | 8650/11074 [1:13:44<19:59, 2.02it/s]
|
1419 |
{'loss': 3.2466, 'grad_norm': 0.23945370316505432, 'learning_rate': 0.00013900426803896234, 'epoch': 10.93}
|
|
|
1420 |
78%|ββββββββ | 8650/11074 [1:13:44<19:59, 2.02it/s]
|
1421 |
78%|ββββββββ | 8651/11074 [1:13:45<19:59, 2.02it/s]
|
1422 |
78%|ββββββββ | 8652/11074 [1:13:45<19:59, 2.02it/s]
|
1423 |
78%|ββββββββ | 8653/11074 [1:13:46<19:57, 2.02it/s]
|
1424 |
78%|ββββββββ | 8654/11074 [1:13:46<19:56, 2.02it/s]
|
1425 |
78%|ββββββββ | 8655/11074 [1:13:47<19:55, 2.02it/s]
|
1426 |
78%|ββββββββ | 8656/11074 [1:13:47<19:56, 2.02it/s]
|
1427 |
78%|ββββββββ | 8657/11074 [1:13:48<19:55, 2.02it/s]
|
1428 |
78%|ββββββββ | 8658/11074 [1:13:48<19:56, 2.02it/s]
|
1429 |
78%|ββββββββ | 8659/11074 [1:13:49<19:54, 2.02it/s]
|
1430 |
78%|ββββββββ | 8660/11074 [1:13:49<19:55, 2.02it/s]
|
1431 |
78%|ββββββββ | 8661/11074 [1:13:49<19:53, 2.02it/s]
|
1432 |
78%|ββββββββ | 8662/11074 [1:13:50<19:52, 2.02it/s]
|
1433 |
78%|ββββββββ | 8663/11074 [1:13:50<19:52, 2.02it/s]
|
1434 |
78%|ββββββββ | 8664/11074 [1:13:51<19:50, 2.02it/s]
|
1435 |
78%|ββββββββ | 8665/11074 [1:13:51<19:51, 2.02it/s]
|
1436 |
78%|ββββββββ | 8666/11074 [1:13:52<19:51, 2.02it/s]
|
1437 |
78%|ββββββββ | 8667/11074 [1:13:52<19:50, 2.02it/s]
|
1438 |
78%|ββββββββ | 8668/11074 [1:13:53<19:48, 2.02it/s]
|
1439 |
78%|ββββββββ | 8669/11074 [1:13:53<19:49, 2.02it/s]
|
1440 |
78%|ββββββββ | 8670/11074 [1:13:54<19:48, 2.02it/s]
|
1441 |
78%|ββββββββ | 8671/11074 [1:13:54<19:48, 2.02it/s]
|
1442 |
78%|ββββββββ | 8672/11074 [1:13:55<19:48, 2.02it/s]
|
1443 |
78%|ββββββββ | 8673/11074 [1:13:55<19:47, 2.02it/s]
|
1444 |
78%|ββββββββ | 8674/11074 [1:13:56<19:47, 2.02it/s]
|
1445 |
78%|ββββββββ | 8675/11074 [1:13:56<19:46, 2.02it/s]
|
1446 |
|
|
|
1447 |
78%|ββββββββ | 8675/11074 [1:13:56<19:46, 2.02it/s]
|
1448 |
78%|ββββββββ | 8676/11074 [1:13:57<19:46, 2.02it/s]
|
1449 |
78%|ββββββββ | 8677/11074 [1:13:57<19:45, 2.02it/s]
|
1450 |
78%|ββββββββ | 8678/11074 [1:13:58<19:45, 2.02it/s]
|
1451 |
78%|ββββββββ | 8679/11074 [1:13:58<19:43, 2.02it/s]
|
1452 |
78%|ββββββββ | 8680/11074 [1:13:59<19:44, 2.02it/s]
|
1453 |
78%|ββββββββ | 8681/11074 [1:13:59<19:43, 2.02it/s]
|
1454 |
78%|ββββββββ | 8682/11074 [1:14:00<19:42, 2.02it/s]
|
1455 |
78%|ββββββββ | 8683/11074 [1:14:00<19:42, 2.02it/s]
|
1456 |
78%|ββββββββ | 8684/11074 [1:14:01<19:41, 2.02it/s]
|
1457 |
78%|ββββββββ | 8685/11074 [1:14:01<19:41, 2.02it/s]
|
1458 |
78%|ββββββββ | 8686/11074 [1:14:02<19:42, 2.02it/s]
|
1459 |
78%|ββββββββ | 8687/11074 [1:14:02<19:41, 2.02it/s]
|
1460 |
78%|ββββββββ | 8688/11074 [1:14:03<19:40, 2.02it/s]
|
1461 |
78%|ββββββββ | 8689/11074 [1:14:03<19:39, 2.02it/s]
|
1462 |
78%|ββββββββ | 8690/11074 [1:14:04<19:40, 2.02it/s]
|
1463 |
78%|ββββββββ | 8691/11074 [1:14:04<19:40, 2.02it/s]
|
1464 |
78%|ββββββββ | 8692/11074 [1:14:05<19:38, 2.02it/s]
|
1465 |
78%|ββββββββ | 8693/11074 [1:14:05<19:37, 2.02it/s]
|
1466 |
79%|ββββββββ | 8694/11074 [1:14:06<19:36, 2.02it/s]
|
1467 |
79%|ββββββββ | 8695/11074 [1:14:06<19:36, 2.02it/s]
|
1468 |
79%|ββββββββ | 8696/11074 [1:14:07<19:35, 2.02it/s]
|
1469 |
79%|ββββββββ | 8697/11074 [1:14:07<19:35, 2.02it/s]
|
1470 |
79%|ββββββββ | 8698/11074 [1:14:08<19:35, 2.02it/s]
|
1471 |
79%|ββββββββ | 8699/11074 [1:14:08<19:34, 2.02it/s]
|
1472 |
79%|ββββββββ | 8700/11074 [1:14:09<19:33, 2.02it/s]{'loss': 3.2503, 'grad_norm': 0.24186570942401886, 'learning_rate': 0.000133596608763568, 'epoch': 10.99}
|
|
|
1473 |
|
1474 |
79%|ββββββββ | 8700/11074 [1:14:09<19:33, 2.02it/s]
|
1475 |
79%|ββββββββ | 8701/11074 [1:14:09<19:34, 2.02it/s]
|
1476 |
79%|ββββββββ | 8702/11074 [1:14:10<19:34, 2.02it/s]
|
1477 |
79%|ββββββββ | 8703/11074 [1:14:10<19:33, 2.02it/s]
|
1478 |
79%|ββββββββ | 8704/11074 [1:14:11<19:33, 2.02it/s]
|
1479 |
79%|ββββββββ | 8705/11074 [1:14:11<19:31, 2.02it/s]
|
1480 |
79%|ββββββββ | 8706/11074 [1:14:12<20:00, 1.97it/s]
|
1481 |
79%|ββββββββ | 8707/11074 [1:14:24<2:38:17, 4.01s/it]
|
1482 |
79%|ββββββββ | 8708/11074 [1:14:24<1:56:41, 2.96s/it]
|
1483 |
79%|ββββββββ | 8709/11074 [1:14:25<1:27:30, 2.22s/it]
|
1484 |
79%|ββββββββ | 8710/11074 [1:14:25<1:07:12, 1.71s/it]
|
1485 |
79%|ββββββββ | 8711/11074 [1:14:26<52:50, 1.34s/it]
|
1486 |
79%|ββββββββ | 8712/11074 [1:14:26<42:50, 1.09s/it]
|
1487 |
79%|ββββββββ | 8713/11074 [1:14:27<35:46, 1.10it/s]
|
1488 |
79%|ββββββββ | 8714/11074 [1:14:27<30:52, 1.27it/s]
|
1489 |
79%|ββββββββ | 8715/11074 [1:14:28<27:25, 1.43it/s]
|
1490 |
79%|ββββββββ | 8716/11074 [1:14:28<25:00, 1.57it/s]
|
1491 |
79%|ββββββββ | 8717/11074 [1:14:29<23:20, 1.68it/s]
|
1492 |
79%|ββββββββ | 8718/11074 [1:14:29<22:07, 1.77it/s]
|
1493 |
79%|ββββββββ | 8719/11074 [1:14:30<21:26, 1.83it/s]
|
1494 |
79%|ββββββββ | 8720/11074 [1:14:30<20:52, 1.88it/s]
|
1495 |
79%|ββββββββ | 8721/11074 [1:14:31<20:27, 1.92it/s]
|
1496 |
79%|ββββββββ | 8722/11074 [1:14:31<20:06, 1.95it/s]
|
1497 |
79%|ββββββββ | 8723/11074 [1:14:32<19:52, 1.97it/s]
|
1498 |
79%|ββββββββ | 8724/11074 [1:14:32<19:42, 1.99it/s]
|
1499 |
79%|ββββββββ | 8725/11074 [1:14:33<19:37, 2.00it/s]
|
1500 |
{'loss': 3.1825, 'grad_norm': 0.2393781691789627, 'learning_rate': 0.0001309268294598309, 'epoch': 11.02}
|
|
|
1501 |
79%|ββββββββ | 8725/11074 [1:14:33<19:37, 2.00it/s]
|
1502 |
79%|ββββββββ | 8726/11074 [1:14:33<19:32, 2.00it/s]
|
1503 |
79%|ββββββββ | 8727/11074 [1:14:34<19:28, 2.01it/s]
|
1504 |
79%|ββββββββ | 8728/11074 [1:14:34<19:27, 2.01it/s]
|
1505 |
79%|ββββββββ | 8729/11074 [1:14:35<19:24, 2.01it/s]
|
1506 |
79%|ββββββββ | 8730/11074 [1:14:35<19:22, 2.02it/s]
|
1507 |
79%|ββββββββ | 8731/11074 [1:14:36<19:23, 2.01it/s]
|
1508 |
79%|ββββββββ | 8732/11074 [1:14:36<19:22, 2.01it/s]
|
1509 |
79%|ββββββββ | 8733/11074 [1:14:37<19:21, 2.01it/s]
|
1510 |
79%|ββββββββ | 8734/11074 [1:14:37<19:20, 2.02it/s]
|
1511 |
79%|ββββββββ | 8735/11074 [1:14:38<19:17, 2.02it/s]
|
1512 |
79%|ββββββββ | 8736/11074 [1:14:38<19:15, 2.02it/s]
|
1513 |
79%|ββββββββ | 8737/11074 [1:14:39<19:15, 2.02it/s]
|
|
|
536 |
|
537 |
71%|βββββββ | 7850/11074 [1:06:57<26:33, 2.02it/s]
|
538 |
71%|βββββββ | 7851/11074 [1:06:58<26:34, 2.02it/s]
|
539 |
71%|βββββββ | 7852/11074 [1:06:58<26:34, 2.02it/s]
|
540 |
71%|βββββββ | 7853/11074 [1:06:59<26:32, 2.02it/s]
|
541 |
71%|βββββββ | 7854/11074 [1:06:59<26:32, 2.02it/s]
|
542 |
71%|βββββββ | 7855/11074 [1:07:00<26:30, 2.02it/s]
|
543 |
71%|βββββββ | 7856/11074 [1:07:00<26:30, 2.02it/s]
|
544 |
71%|βββββββ | 7857/11074 [1:07:01<26:28, 2.02it/s]
|
545 |
71%|βββββββ | 7858/11074 [1:07:01<26:29, 2.02it/s]
|
546 |
71%|βββββββ | 7859/11074 [1:07:02<26:27, 2.02it/s]
|
547 |
71%|βββββββ | 7860/11074 [1:07:02<26:27, 2.02it/s]
|
548 |
71%|βββββββ | 7861/11074 [1:07:03<26:27, 2.02it/s]
|
549 |
71%|βββββββ | 7862/11074 [1:07:03<26:26, 2.02it/s]
|
550 |
71%|βββββββ | 7863/11074 [1:07:04<26:26, 2.02it/s]
|
551 |
71%|βββββββ | 7864/11074 [1:07:04<26:24, 2.03it/s]
|
552 |
71%|βββββββ | 7865/11074 [1:07:05<26:23, 2.03it/s]
|
553 |
71%|βββββββ | 7866/11074 [1:07:05<26:23, 2.03it/s]
|
554 |
71%|βββββββ | 7867/11074 [1:07:06<26:23, 2.02it/s]
|
555 |
71%|βββββββ | 7868/11074 [1:07:06<26:22, 2.03it/s]
|
556 |
71%|βββββββ | 7869/11074 [1:07:07<26:21, 2.03it/s]
|
557 |
71%|βββββββ | 7870/11074 [1:07:07<26:21, 2.03it/s]
|
558 |
71%|βββββββ | 7871/11074 [1:07:07<26:20, 2.03it/s]
|
559 |
71%|βββββββ | 7872/11074 [1:07:08<26:20, 2.03it/s]
|
560 |
71%|βββββββ | 7873/11074 [1:07:08<26:20, 2.03it/s]
|
561 |
71%|βββββββ | 7874/11074 [1:07:09<26:19, 2.03it/s]
|
562 |
71%|βββββββ | 7875/11074 [1:07:09<26:19, 2.03it/s]
|
563 |
{'loss': 3.3077, 'grad_norm': 0.23198536038398743, 'learning_rate': 0.0002334027221578824, 'epoch': 9.95}
|
564 |
|
565 |
71%|βββββββ | 7875/11074 [1:07:09<26:19, 2.03it/s]
|
566 |
71%|βββββββ | 7876/11074 [1:07:10<26:22, 2.02it/s]
|
567 |
71%|βββββββ | 7877/11074 [1:07:10<26:21, 2.02it/s]
|
568 |
71%|βββββββ | 7878/11074 [1:07:11<26:21, 2.02it/s]
|
569 |
71%|βββββββ | 7879/11074 [1:07:11<26:20, 2.02it/s]
|
570 |
71%|βββββββ | 7880/11074 [1:07:12<26:18, 2.02it/s]
|
571 |
71%|βββββββ | 7881/11074 [1:07:12<26:18, 2.02it/s]
|
572 |
71%|βββββββ | 7882/11074 [1:07:13<26:18, 2.02it/s]
|
573 |
71%|βββββββ | 7883/11074 [1:07:13<26:15, 2.02it/s]
|
574 |
71%|βββββββ | 7884/11074 [1:07:14<26:15, 2.02it/s]
|
575 |
71%|βββββββ | 7885/11074 [1:07:14<26:13, 2.03it/s]
|
576 |
71%|βββββββ | 7886/11074 [1:07:15<26:13, 2.03it/s]
|
577 |
71%|βββββββ | 7887/11074 [1:07:15<26:13, 2.03it/s]
|
578 |
71%|βββββββ | 7888/11074 [1:07:16<26:11, 2.03it/s]
|
579 |
71%|βββββββ | 7889/11074 [1:07:16<26:12, 2.02it/s]
|
580 |
71%|βββββββ | 7890/11074 [1:07:17<26:10, 2.03it/s]
|
581 |
71%|ββββββββ | 7891/11074 [1:07:17<26:11, 2.02it/s]
|
582 |
71%|ββββββββ | 7892/11074 [1:07:18<26:08, 2.03it/s]
|
583 |
71%|ββββββββ | 7893/11074 [1:07:18<26:10, 2.03it/s]
|
584 |
71%|ββββββββ | 7894/11074 [1:07:19<26:10, 2.03it/s]
|
585 |
71%|ββββββββ | 7895/11074 [1:07:19<26:09, 2.03it/s]
|
586 |
71%|ββββββββ | 7896/11074 [1:07:20<26:09, 2.03it/s]
|
587 |
71%|ββββββββ | 7897/11074 [1:07:20<26:09, 2.02it/s]
|
588 |
71%|ββββββββ | 7898/11074 [1:07:21<26:08, 2.03it/s]
|
589 |
71%|ββββββββ | 7899/11074 [1:07:21<26:06, 2.03it/s]
|
590 |
71%|ββββββββ | 7900/11074 [1:07:22<26:06, 2.03it/s]{'loss': 3.3093, 'grad_norm': 0.23056922852993011, 'learning_rate': 0.00023007749594435663, 'epoch': 9.98}
|
591 |
|
592 |
|
593 |
71%|ββββββββ | 7900/11074 [1:07:22<26:06, 2.03it/s]
|
594 |
71%|ββββββββ | 7901/11074 [1:07:22<26:09, 2.02it/s]
|
595 |
71%|ββββββββ | 7902/11074 [1:07:23<26:09, 2.02it/s]
|
596 |
71%|ββββββββ | 7903/11074 [1:07:23<26:07, 2.02it/s]
|
597 |
71%|ββββββββ | 7904/11074 [1:07:24<26:06, 2.02it/s]
|
598 |
71%|ββββββββ | 7905/11074 [1:07:24<26:05, 2.02it/s]
|
599 |
71%|ββββββββ | 7906/11074 [1:07:25<26:03, 2.03it/s]
|
600 |
71%|ββββββββ | 7907/11074 [1:07:25<26:03, 2.03it/s]
|
601 |
71%|ββββββββ | 7908/11074 [1:07:26<26:01, 2.03it/s]
|
602 |
71%|ββββββββ | 7909/11074 [1:07:26<26:01, 2.03it/s]
|
603 |
71%|ββββββββ | 7910/11074 [1:07:27<26:01, 2.03it/s]
|
604 |
71%|ββββββββ | 7911/11074 [1:07:27<25:59, 2.03it/s]
|
605 |
71%|ββββββββ | 7912/11074 [1:07:28<25:59, 2.03it/s]
|
606 |
71%|ββββββββ | 7913/11074 [1:07:28<25:58, 2.03it/s]
|
607 |
71%|ββββββββ | 7914/11074 [1:07:29<26:00, 2.03it/s]
|
608 |
71%|ββββββββ | 7915/11074 [1:07:29<25:45, 2.04it/s]
|
609 |
71%|βοΏ½οΏ½οΏ½ββββββ | 7916/11074 [1:07:41<3:26:15, 3.92s/it]
|
610 |
71%|ββββββββ | 7917/11074 [1:07:42<2:32:17, 2.89s/it]
|
611 |
72%|ββββββββ | 7918/11074 [1:07:42<1:54:19, 2.17s/it]
|
612 |
72%|ββββββββ | 7919/11074 [1:07:43<1:27:47, 1.67s/it]
|
613 |
72%|ββββββββ | 7920/11074 [1:07:43<1:09:13, 1.32s/it]
|
614 |
72%|ββββββββ | 7921/11074 [1:07:44<56:14, 1.07s/it]
|
615 |
72%|ββββββββ | 7922/11074 [1:07:44<47:07, 1.11it/s]
|
616 |
72%|ββββββββ | 7923/11074 [1:07:45<40:57, 1.28it/s]
|
617 |
72%|ββββββββ | 7924/11074 [1:07:45<36:26, 1.44it/s]
|
618 |
72%|ββββββββ | 7925/11074 [1:07:46<33:16, 1.58it/s]
|
619 |
|
|
|
620 |
72%|ββββββββ | 7925/11074 [1:07:46<33:16, 1.58it/s]
|
621 |
72%|ββββββββ | 7926/11074 [1:07:46<31:04, 1.69it/s]
|
622 |
72%|ββββββββ | 7927/11074 [1:07:47<29:32, 1.78it/s]
|
623 |
72%|ββββββββ | 7928/11074 [1:07:47<28:29, 1.84it/s]
|
624 |
72%|ββββββββ | 7929/11074 [1:07:48<27:42, 1.89it/s]
|
625 |
72%|ββββββββ | 7930/11074 [1:07:48<27:10, 1.93it/s]
|
626 |
72%|ββββββββ | 7931/11074 [1:07:49<26:47, 1.96it/s]
|
627 |
72%|ββββββββ | 7932/11074 [1:07:49<26:30, 1.98it/s]
|
628 |
72%|ββββββββ | 7933/11074 [1:07:50<26:19, 1.99it/s]
|
629 |
72%|ββββββββ | 7934/11074 [1:07:50<26:10, 2.00it/s]
|
630 |
72%|ββββββββ | 7935/11074 [1:07:51<26:03, 2.01it/s]
|
631 |
72%|ββββββββ | 7936/11074 [1:07:51<25:59, 2.01it/s]
|
632 |
72%|ββββββββ | 7937/11074 [1:07:52<25:55, 2.02it/s]
|
633 |
72%|ββββββββ | 7938/11074 [1:07:52<25:52, 2.02it/s]
|
634 |
72%|ββββββββ | 7939/11074 [1:07:53<25:50, 2.02it/s]
|
635 |
72%|ββββββββ | 7940/11074 [1:07:53<25:54, 2.02it/s]
|
636 |
+
|
637 |
72%|ββββββββ | 7925/11074 [1:07:46<33:16, 1.58it/s]
|
638 |
72%|ββββββββ | 7926/11074 [1:07:46<31:04, 1.69it/s]
|
639 |
72%|ββββββββ | 7927/11074 [1:07:47<29:32, 1.78it/s]
|
640 |
72%|ββββββββ | 7928/11074 [1:07:47<28:29, 1.84it/s]
|
641 |
72%|ββββββββ | 7929/11074 [1:07:48<27:42, 1.89it/s]
|
642 |
72%|ββββββββ | 7930/11074 [1:07:48<27:10, 1.93it/s]
|
643 |
72%|ββββββββ | 7931/11074 [1:07:49<26:47, 1.96it/s]
|
644 |
72%|ββββββββ | 7932/11074 [1:07:49<26:30, 1.98it/s]
|
645 |
72%|ββββββββ | 7933/11074 [1:07:50<26:19, 1.99it/s]
|
646 |
72%|ββββββββ | 7934/11074 [1:07:50<26:10, 2.00it/s]
|
647 |
72%|ββββββββ | 7935/11074 [1:07:51<26:03, 2.01it/s]
|
648 |
72%|ββββββββ | 7936/11074 [1:07:51<25:59, 2.01it/s]
|
649 |
72%|ββββββββ | 7937/11074 [1:07:52<25:55, 2.02it/s]
|
650 |
72%|ββββββββ | 7938/11074 [1:07:52<25:52, 2.02it/s]
|
651 |
72%|ββββββββ | 7939/11074 [1:07:53<25:50, 2.02it/s]
|
652 |
72%|ββββββββ | 7940/11074 [1:07:53<25:54, 2.02it/s]
|
653 |
72%|ββββββββ | 7941/11074 [1:07:54<25:57, 2.01it/s]
|
654 |
72%|ββββββββ | 7942/11074 [1:07:54<25:51, 2.02it/s]
|
655 |
72%|ββββββββ | 7943/11074 [1:07:55<25:51, 2.02it/s]
|
656 |
72%|ββββββββ | 7944/11074 [1:07:55<25:47, 2.02it/s]
|
657 |
72%|ββββββββ | 7945/11074 [1:07:55<25:47, 2.02it/s]
|
658 |
72%|ββββββββ | 7946/11074 [1:07:56<25:44, 2.03it/s]
|
659 |
72%|ββββββββ | 7947/11074 [1:07:56<25:43, 2.03it/s]
|
660 |
72%|ββββββββ | 7948/11074 [1:07:57<25:43, 2.02it/s]
|
661 |
72%|ββββββββ | 7949/11074 [1:07:57<25:41, 2.03it/s]
|
662 |
72%|ββββββββ | 7950/11074 [1:07:58<25:42, 2.03it/s]{'loss': 3.2027, 'grad_norm': 0.23849323391914368, 'learning_rate': 0.00022347754066845987, 'epoch': 10.04}
|
663 |
|
664 |
+
|
665 |
72%|ββββββββ | 7950/11074 [1:07:58<25:42, 2.03it/s]
|
666 |
72%|ββββββββ | 7951/11074 [1:07:58<25:44, 2.02it/s]
|
667 |
72%|ββββββββ | 7952/11074 [1:07:59<25:42, 2.02it/s]
|
668 |
72%|ββββββββ | 7953/11074 [1:07:59<25:40, 2.03it/s]
|
669 |
72%|ββββββββ | 7954/11074 [1:08:00<25:40, 2.03it/s]
|
670 |
72%|ββββββββ | 7955/11074 [1:08:00<25:38, 2.03it/s]
|
671 |
72%|ββββββββ | 7956/11074 [1:08:01<25:38, 2.03it/s]
|
672 |
72%|ββββββββ | 7957/11074 [1:08:01<25:38, 2.03it/s]
|
673 |
72%|ββββββββ | 7958/11074 [1:08:02<25:38, 2.02it/s]
|
674 |
72%|ββββββββ | 7959/11074 [1:08:02<25:39, 2.02it/s]
|
675 |
72%|ββββββββ | 7960/11074 [1:08:03<25:36, 2.03it/s]
|
676 |
72%|ββββββββ | 7961/11074 [1:08:03<25:36, 2.03it/s]
|
677 |
72%|ββββββββ | 7962/11074 [1:08:04<25:35, 2.03it/s]
|
678 |
72%|ββββββββ | 7963/11074 [1:08:04<25:35, 2.03it/s]
|
679 |
72%|ββββββββ | 7964/11074 [1:08:05<25:34, 2.03it/s]
|
680 |
72%|ββββββββ | 7965/11074 [1:08:05<25:34, 2.03it/s]
|
681 |
72%|ββββββββ | 7966/11074 [1:08:06<25:35, 2.02it/s]
|
682 |
72%|ββββββββ | 7967/11074 [1:08:06<25:35, 2.02it/s]
|
683 |
72%|ββββββββ | 7968/11074 [1:08:07<25:35, 2.02it/s]
|
684 |
72%|ββββββββ | 7969/11074 [1:08:07<25:34, 2.02it/s]
|
685 |
72%|ββββββββ | 7970/11074 [1:08:08<25:33, 2.02it/s]
|
686 |
72%|ββββββββ | 7971/11074 [1:08:08<25:33, 2.02it/s]
|
687 |
72%|ββββββββ | 7972/11074 [1:08:09<25:32, 2.02it/s]
|
688 |
72%|ββββββββ | 7973/11074 [1:08:09<25:30, 2.03it/s]
|
689 |
72%|ββββββββ | 7974/11074 [1:08:10<25:30, 2.03it/s]
|
690 |
72%|ββββββββ | 7975/11074 [1:08:10<25:29, 2.03it/s]{'loss': 3.1925, 'grad_norm': 0.2428920567035675, 'learning_rate': 0.00022020322150497878, 'epoch': 10.08}
|
691 |
|
692 |
+
|
693 |
72%|ββββββββ | 7975/11074 [1:08:10<25:29, 2.03it/s]
|
694 |
72%|ββββββββ | 7976/11074 [1:08:11<25:30, 2.02it/s]
|
695 |
72%|ββββββββ | 7977/11074 [1:08:11<25:29, 2.02it/s]
|
696 |
72%|ββββββββ | 7978/11074 [1:08:12<25:30, 2.02it/s]
|
697 |
72%|ββββββββ | 7979/11074 [1:08:12<25:29, 2.02it/s]
|
698 |
72%|ββββββββ | 7980/11074 [1:08:13<25:29, 2.02it/s]
|
699 |
72%|ββββββββ | 7981/11074 [1:08:13<25:28, 2.02it/s]
|
700 |
72%|ββββββββ | 7982/11074 [1:08:14<25:26, 2.03it/s]
|
701 |
72%|ββββββββ | 7983/11074 [1:08:14<25:27, 2.02it/s]
|
702 |
72%|ββββββββ | 7984/11074 [1:08:15<25:24, 2.03it/s]
|
703 |
72%|ββββββββ | 7985/11074 [1:08:15<25:25, 2.03it/s]
|
704 |
72%|ββββββββ | 7986/11074 [1:08:16<25:23, 2.03it/s]
|
705 |
72%|ββββββββ | 7987/11074 [1:08:16<25:24, 2.03it/s]
|
706 |
72%|ββββββββ | 7988/11074 [1:08:17<25:23, 2.03it/s]
|
707 |
72%|ββββββββ | 7989/11074 [1:08:17<25:20, 2.03it/s]
|
708 |
72%|ββββββββ | 7990/11074 [1:08:18<25:22, 2.03it/s]
|
709 |
72%|ββββββββ | 7991/11074 [1:08:18<25:21, 2.03it/s]
|
710 |
72%|ββββββββ | 7992/11074 [1:08:19<25:22, 2.02it/s]
|
711 |
72%|ββββββββ | 7993/11074 [1:08:19<25:20, 2.03it/s]
|
712 |
72%|ββββββββ | 7994/11074 [1:08:20<25:21, 2.02it/s]
|
713 |
72%|ββββββββ | 7995/11074 [1:08:20<25:20, 2.03it/s]
|
714 |
72%|ββββββββ | 7996/11074 [1:08:21<25:19, 2.03it/s]
|
715 |
72%|ββββββββ | 7997/11074 [1:08:21<25:19, 2.02it/s]
|
716 |
72%|ββββββββ | 7998/11074 [1:08:22<25:17, 2.03it/s]
|
717 |
72%|ββββββββ | 7999/11074 [1:08:22<25:18, 2.03it/s]
|
718 |
72%|ββββββββ | 8000/11074 [1:08:23<25:15, 2.03it/s]
|
719 |
{'loss': 3.2116, 'grad_norm': 0.23470519483089447, 'learning_rate': 0.00021694627948786466, 'epoch': 10.11}
|
720 |
+
|
721 |
72%|ββββββββ | 8000/11074 [1:08:23<25:15, 2.03it/s]
|
722 |
72%|ββββββββ | 8001/11074 [1:08:23<25:18, 2.02it/s]
|
723 |
72%|ββββββββ | 8002/11074 [1:08:24<25:17, 2.02it/s]
|
724 |
72%|ββββββββ | 8003/11074 [1:08:24<25:17, 2.02it/s]
|
725 |
72%|ββββββββ | 8004/11074 [1:08:25<25:16, 2.02it/s]
|
726 |
72%|ββββββββ | 8005/11074 [1:08:25<25:16, 2.02it/s]
|
727 |
72%|ββββββββ | 8006/11074 [1:08:26<25:16, 2.02it/s]
|
728 |
72%|ββββββββ | 8007/11074 [1:08:26<25:15, 2.02it/s]
|
729 |
72%|ββββββββ | 8008/11074 [1:08:27<25:14, 2.03it/s]
|
730 |
72%|ββββββββ | 8009/11074 [1:08:27<25:12, 2.03it/s]
|
731 |
72%|ββββββββ | 8010/11074 [1:08:28<25:12, 2.03it/s]
|
732 |
72%|ββββββββ | 8011/11074 [1:08:28<25:11, 2.03it/s]
|
733 |
72%|ββββββββ | 8012/11074 [1:08:29<25:11, 2.03it/s]
|
734 |
72%|ββββββββ | 8013/11074 [1:08:29<25:11, 2.02it/s]
|
735 |
72%|ββββββββ | 8014/11074 [1:08:30<25:11, 2.02it/s]
|
736 |
72%|ββββββββ | 8015/11074 [1:08:30<25:10, 2.02it/s]
|
737 |
72%|ββββββββ | 8016/11074 [1:08:31<25:10, 2.02it/s]
|
738 |
72%|ββββββββ | 8017/11074 [1:08:31<25:10, 2.02it/s]
|
739 |
72%|ββββββββ | 8018/11074 [1:08:32<25:08, 2.03it/s]
|
740 |
72%|ββββββββ | 8019/11074 [1:08:32<25:10, 2.02it/s]
|
741 |
72%|ββββββββ | 8020/11074 [1:08:33<25:07, 2.03it/s]
|
742 |
72%|ββββββββ | 8021/11074 [1:08:33<25:08, 2.02it/s]
|
743 |
72%|ββββββββ | 8022/11074 [1:08:34<25:06, 2.03it/s]
|
744 |
72%|ββββββββ | 8023/11074 [1:08:34<25:07, 2.02it/s]
|
745 |
72%|ββββββββ | 8024/11074 [1:08:34<25:07, 2.02it/s]
|
746 |
72%|ββββββββ | 8025/11074 [1:08:35<25:07, 2.02it/s]
|
747 |
{'loss': 3.2031, 'grad_norm': 0.24160942435264587, 'learning_rate': 0.00021370691689377887, 'epoch': 10.14}
|
748 |
+
|
749 |
72%|ββββββββ | 8025/11074 [1:08:35<25:07, 2.02it/s]
|
750 |
72%|ββββββββ | 8026/11074 [1:08:35<25:08, 2.02it/s]
|
751 |
72%|ββοΏ½οΏ½οΏ½βββββ | 8027/11074 [1:08:36<25:06, 2.02it/s]
|
752 |
72%|ββββββββ | 8028/11074 [1:08:36<25:05, 2.02it/s]
|
753 |
73%|ββββββββ | 8029/11074 [1:08:37<25:04, 2.02it/s]
|
754 |
73%|ββββββββ | 8030/11074 [1:08:37<25:03, 2.02it/s]
|
755 |
73%|ββββββββ | 8031/11074 [1:08:38<25:02, 2.03it/s]
|
756 |
73%|ββββββββ | 8032/11074 [1:08:38<25:02, 2.02it/s]
|
757 |
73%|ββββββββ | 8033/11074 [1:08:39<25:03, 2.02it/s]
|
758 |
73%|ββββββββ | 8034/11074 [1:08:39<25:02, 2.02it/s]
|
759 |
73%|ββββββββ | 8035/11074 [1:08:40<25:01, 2.02it/s]
|
760 |
73%|ββββββββ | 8036/11074 [1:08:40<25:01, 2.02it/s]
|
761 |
73%|ββββββββ | 8037/11074 [1:08:41<24:59, 2.03it/s]
|
762 |
73%|ββββββββ | 8038/11074 [1:08:41<24:59, 2.02it/s]
|
763 |
73%|ββββββββ | 8039/11074 [1:08:42<24:58, 2.03it/s]
|
764 |
73%|ββββββββ | 8040/11074 [1:08:42<24:58, 2.02it/s]
|
765 |
73%|ββββββββ | 8041/11074 [1:08:43<24:58, 2.02it/s]
|
766 |
73%|ββββββββ | 8042/11074 [1:08:43<24:57, 2.03it/s]
|
767 |
73%|ββββββββ | 8043/11074 [1:08:44<25:14, 2.00it/s]
|
768 |
73%|ββββββββ | 8044/11074 [1:08:44<25:08, 2.01it/s]
|
769 |
73%|ββββββββ | 8045/11074 [1:08:45<25:03, 2.01it/s]
|
770 |
73%|ββββββββ | 8046/11074 [1:08:45<25:00, 2.02it/s]
|
771 |
73%|ββββββββ | 8047/11074 [1:08:46<24:58, 2.02it/s]
|
772 |
73%|ββββββββ | 8048/11074 [1:08:46<24:57, 2.02it/s]
|
773 |
73%|ββββββββ | 8049/11074 [1:08:47<24:57, 2.02it/s]
|
774 |
73%|ββββββββ | 8050/11074 [1:08:47<24:55, 2.02it/s]
|
775 |
|
776 |
+
|
777 |
73%|ββββββββ | 8050/11074 [1:08:47<24:55, 2.02it/s]
|
778 |
73%|ββββββββ | 8051/11074 [1:08:48<24:56, 2.02it/s]
|
779 |
73%|ββββββββ | 8052/11074 [1:08:48<24:55, 2.02it/s]
|
780 |
73%|ββββββββ | 8053/11074 [1:08:49<24:55, 2.02it/s]
|
781 |
73%|ββββββββ | 8054/11074 [1:08:49<24:53, 2.02it/s]
|
782 |
73%|ββββββββ | 8055/11074 [1:08:50<24:52, 2.02it/s]
|
783 |
73%|ββββββββ | 8056/11074 [1:08:50<24:52, 2.02it/s]
|
784 |
73%|ββββββββ | 8057/11074 [1:08:51<24:49, 2.03it/s]
|
785 |
73%|ββββββββ | 8058/11074 [1:08:51<24:49, 2.02it/s]
|
786 |
73%|ββββββββ | 8059/11074 [1:08:52<24:48, 2.02it/s]
|
787 |
73%|ββββββββ | 8060/11074 [1:08:52<24:48, 2.02it/s]
|
788 |
73%|ββββββββ | 8061/11074 [1:08:53<24:47, 2.03it/s]
|
789 |
73%|ββββββββ | 8062/11074 [1:08:53<24:47, 2.02it/s]
|
790 |
73%|ββββββββ | 8063/11074 [1:08:54<24:47, 2.02it/s]
|
791 |
73%|ββββββββ | 8064/11074 [1:08:54<24:44, 2.03it/s]
|
792 |
73%|ββββββββ | 8065/11074 [1:08:55<24:45, 2.03it/s]
|
793 |
73%|ββββββββ | 8066/11074 [1:08:55<24:43, 2.03it/s]
|
794 |
73%|ββββββββ | 8067/11074 [1:08:56<24:44, 2.03it/s]
|
795 |
73%|ββββββββ | 8068/11074 [1:08:56<24:43, 2.03it/s]
|
796 |
73%|ββββββββ | 8069/11074 [1:08:57<24:43, 2.03it/s]
|
797 |
73%|ββββββββ | 8070/11074 [1:08:57<24:42, 2.03it/s]
|
798 |
73%|ββββββββ | 8071/11074 [1:08:58<24:42, 2.03it/s]
|
799 |
73%|ββββββββ | 8072/11074 [1:08:58<24:44, 2.02it/s]
|
800 |
73%|ββββββββ | 8073/11074 [1:08:59<24:44, 2.02it/s]
|
801 |
73%|ββββββββ | 8074/11074 [1:08:59<24:43, 2.02it/s]
|
802 |
73%|ββββββββ | 8075/11074 [1:09:00<24:43, 2.02it/s]
|
803 |
|
804 |
+
|
805 |
73%|ββββββββ | 8075/11074 [1:09:00<24:43, 2.02it/s]
|
806 |
73%|ββββββββ | 8076/11074 [1:09:00<24:43, 2.02it/s]
|
807 |
73%|ββββββββ | 8077/11074 [1:09:01<24:42, 2.02it/s]
|
808 |
73%|ββββββββ | 8078/11074 [1:09:01<24:41, 2.02it/s]
|
809 |
73%|ββββββββ | 8079/11074 [1:09:02<24:38, 2.03it/s]
|
810 |
73%|ββββββββ | 8080/11074 [1:09:02<24:38, 2.02it/s]
|
811 |
73%|ββββββββ | 8081/11074 [1:09:03<24:35, 2.03it/s]
|
812 |
73%|ββββββββ | 8082/11074 [1:09:03<24:37, 2.03it/s]
|
813 |
73%|ββββββββ | 8083/11074 [1:09:04<24:36, 2.03it/s]
|
814 |
73%|ββββββββ | 8084/11074 [1:09:04<24:35, 2.03it/s]
|
815 |
73%|ββββββββ | 8085/11074 [1:09:05<24:34, 2.03it/s]
|
816 |
73%|ββββββββ | 8086/11074 [1:09:05<24:33, 2.03it/s]
|
817 |
73%|ββββββββ | 8087/11074 [1:09:06<24:34, 2.03it/s]
|
818 |
73%|ββββββββ | 8088/11074 [1:09:06<24:31, 2.03it/s]
|
819 |
73%|ββββββββ | 8089/11074 [1:09:07<24:32, 2.03it/s]
|
820 |
73%|ββββββββ | 8090/11074 [1:09:07<24:31, 2.03it/s]
|
821 |
73%|ββββββββ | 8091/11074 [1:09:08<24:32, 2.03it/s]
|
822 |
73%|ββββββββ | 8092/11074 [1:09:08<24:31, 2.03it/s]
|
823 |
73%|ββββββββ | 8093/11074 [1:09:09<24:31, 2.03it/s]
|
824 |
73%|ββββββββ | 8094/11074 [1:09:09<24:31, 2.03it/s]
|
825 |
73%|ββββββββ | 8095/11074 [1:09:10<24:31, 2.02it/s]
|
826 |
73%|ββββββββ | 8096/11074 [1:09:10<24:30, 2.02it/s]
|
827 |
73%|ββββββββ | 8097/11074 [1:09:11<24:31, 2.02it/s]
|
828 |
73%|ββββββββ | 8098/11074 [1:09:11<24:30, 2.02it/s]
|
829 |
73%|ββββββββ | 8099/11074 [1:09:12<24:28, 2.03it/s]
|
830 |
73%|ββββββββ | 8100/11074 [1:09:12<24:29, 2.02it/s]{'loss': 3.2194, 'grad_norm': 0.23723545670509338, 'learning_rate': 0.00020409631196450457, 'epoch': 10.23}
|
831 |
+
|
832 |
|
833 |
73%|ββββββββ | 8100/11074 [1:09:12<24:29, 2.02it/s]
|
834 |
73%|ββββββββ | 8101/11074 [1:09:13<24:28, 2.02it/s]
|
835 |
73%|ββββββββ | 8102/11074 [1:09:13<24:28, 2.02it/s]
|
836 |
73%|ββββββββ | 8103/11074 [1:09:14<24:26, 2.03it/s]
|
837 |
73%|ββββββββ | 8104/11074 [1:09:14<24:26, 2.03it/s]
|
838 |
73%|ββββββββ | 8105/11074 [1:09:15<24:24, 2.03it/s]
|
839 |
73%|ββββββββ | 8106/11074 [1:09:15<24:25, 2.03it/s]
|
840 |
73%|ββββββββ | 8107/11074 [1:09:16<24:25, 2.02it/s]
|
841 |
73%|ββββββββ | 8108/11074 [1:09:16<24:24, 2.03it/s]
|
842 |
73%|ββββββββ | 8109/11074 [1:09:16<24:24, 2.02it/s]
|
843 |
73%|ββββββββ | 8110/11074 [1:09:17<24:22, 2.03it/s]
|
844 |
73%|ββββββββ | 8111/11074 [1:09:17<24:23, 2.03it/s]
|
845 |
73%|ββββββββ | 8112/11074 [1:09:18<24:21, 2.03it/s]
|
846 |
73%|ββββββββ | 8113/11074 [1:09:18<24:22, 2.03it/s]
|
847 |
73%|ββββββββ | 8114/11074 [1:09:19<24:20, 2.03it/s]
|
848 |
73%|ββββββββ | 8115/11074 [1:09:19<24:19, 2.03it/s]
|
849 |
73%|ββββββββ | 8116/11074 [1:09:20<24:20, 2.03it/s]
|
850 |
73%|ββββββββ | 8117/11074 [1:09:20<24:17, 2.03it/s]
|
851 |
73%|ββββββββ | 8118/11074 [1:09:21<24:19, 2.03it/s]
|
852 |
73%|ββββββββ | 8119/11074 [1:09:21<24:17, 2.03it/s]
|
853 |
73%|ββββββββ | 8120/11074 [1:09:22<24:17, 2.03it/s]
|
854 |
73%|ββββββββ | 8121/11074 [1:09:22<24:17, 2.03it/s]
|
855 |
73%|ββββββββ | 8122/11074 [1:09:23<24:17, 2.03it/s]
|
856 |
73%|ββββββββ | 8123/11074 [1:09:23<24:17, 2.02it/s]
|
857 |
73%|ββββββββ | 8124/11074 [1:09:24<24:15, 2.03it/s]
|
858 |
73%|ββββββββ | 8125/11074 [1:09:24<24:16, 2.02it/s]
|
859 |
|
860 |
+
|
861 |
73%|ββββββββ | 8125/11074 [1:09:24<24:16, 2.02it/s]
|
862 |
73%|ββββββββ | 8126/11074 [1:09:25<24:16, 2.02it/s]
|
863 |
73%|ββββββββ | 8127/11074 [1:09:25<24:18, 2.02it/s]
|
864 |
73%|ββββββββ | 8128/11074 [1:09:26<24:15, 2.02it/s]
|
865 |
73%|ββββββββ | 8129/11074 [1:09:26<24:14, 2.02it/s]
|
866 |
73%|ββββββββ | 8130/11074 [1:09:27<24:13, 2.03it/s]
|
867 |
73%|ββββββββ | 8131/11074 [1:09:27<24:14, 2.02it/s]
|
868 |
73%|ββββββββ | 8132/11074 [1:09:28<24:13, 2.02it/s]
|
869 |
73%|ββββββββ | 8133/11074 [1:09:28<24:14, 2.02it/s]
|
870 |
73%|ββββββββ | 8134/11074 [1:09:29<24:13, 2.02it/s]
|
871 |
73%|ββββββββ | 8135/11074 [1:09:29<24:12, 2.02it/s]
|
872 |
73%|ββββββββ | 8136/11074 [1:09:30<24:11, 2.02it/s]
|
873 |
73%|ββββββββ | 8137/11074 [1:09:30<24:11, 2.02it/s]
|
874 |
73%|ββββββββ | 8138/11074 [1:09:31<24:11, 2.02it/s]
|
875 |
73%|ββββββββ | 8139/11074 [1:09:31<24:10, 2.02it/s]
|
876 |
74%|ββββββββ | 8140/11074 [1:09:32<24:10, 2.02it/s]
|
877 |
74%|ββββββββ | 8141/11074 [1:09:32<24:10, 2.02it/s]
|
878 |
74%|ββββββββ | 8142/11074 [1:09:33<24:08, 2.02it/s]
|
879 |
74%|ββββββββ | 8143/11074 [1:09:33<24:07, 2.02it/s]
|
880 |
74%|ββββββββ | 8144/11074 [1:09:34<24:06, 2.03it/s]
|
881 |
74%|ββββββββ | 8145/11074 [1:09:34<24:06, 2.03it/s]
|
882 |
74%|ββββββββ | 8146/11074 [1:09:35<24:05, 2.03it/s]
|
883 |
74%|ββββββββ | 8147/11074 [1:09:35<24:05, 2.03it/s]
|
884 |
74%|ββββββββ | 8148/11074 [1:09:36<24:05, 2.02it/s]
|
885 |
74%|ββββββββ | 8149/11074 [1:09:36<24:04, 2.02it/s]
|
886 |
74%|ββββββββ | 8150/11074 [1:09:37<24:04, 2.02it/s]
|
887 |
{'loss': 3.2225, 'grad_norm': 0.23790352046489716, 'learning_rate': 0.00019778079782860702, 'epoch': 10.3}
|
888 |
+
|
889 |
74%|ββββββββ | 8150/11074 [1:09:37<24:04, 2.02it/s]
|
890 |
74%|ββββββββ | 8151/11074 [1:09:37<24:05, 2.02it/s]
|
891 |
74%|ββββββββ | 8152/11074 [1:09:38<24:05, 2.02it/s]
|
892 |
74%|ββββββββ | 8153/11074 [1:09:38<24:04, 2.02it/s]
|
893 |
74%|ββββββββ | 8154/11074 [1:09:39<24:04, 2.02it/s]
|
894 |
74%|ββββββββ | 8155/11074 [1:09:39<24:03, 2.02it/s]
|
895 |
74%|ββββββββ | 8156/11074 [1:09:40<24:01, 2.02it/s]
|
896 |
74%|ββββββββ | 8157/11074 [1:09:40<24:00, 2.03it/s]
|
897 |
74%|ββββββββ | 8158/11074 [1:09:41<23:59, 2.03it/s]
|
898 |
74%|ββββββββ | 8159/11074 [1:09:41<23:58, 2.03it/s]
|
899 |
74%|ββββββββ | 8160/11074 [1:09:42<23:58, 2.03it/s]
|
900 |
74%|ββββββββ | 8161/11074 [1:09:42<23:56, 2.03it/s]
|
901 |
74%|ββββββββ | 8162/11074 [1:09:43<23:56, 2.03it/s]
|
902 |
74%|ββββββββ | 8163/11074 [1:09:43<23:55, 2.03it/s]
|
903 |
74%|ββββββββ | 8164/11074 [1:09:44<23:55, 2.03it/s]
|
904 |
74%|ββββββββ | 8165/11074 [1:09:44<23:55, 2.03it/s]
|
905 |
74%|ββββββββ | 8166/11074 [1:09:45<23:54, 2.03it/s]
|
906 |
74%|ββββββββ | 8167/11074 [1:09:45<23:55, 2.03it/s]
|
907 |
74%|ββββββββ | 8168/11074 [1:09:46<23:53, 2.03it/s]
|
908 |
74%|ββββββββ | 8169/11074 [1:09:46<23:53, 2.03it/s]
|
909 |
74%|ββββββββ | 8170/11074 [1:09:47<23:51, 2.03it/s]
|
910 |
74%|ββββββββ | 8171/11074 [1:09:47<23:49, 2.03it/s]
|
911 |
74%|ββββββββ | 8172/11074 [1:09:48<23:51, 2.03it/s]
|
912 |
74%|ββββββββ | 8173/11074 [1:09:48<23:49, 2.03it/s]
|
913 |
74%|ββββββββ | 8174/11074 [1:09:49<23:49, 2.03it/s]
|
914 |
74%|ββββββββ | 8175/11074 [1:09:49<23:49, 2.03it/s]
|
915 |
{'loss': 3.2276, 'grad_norm': 0.2349643111228943, 'learning_rate': 0.00019465109757138034, 'epoch': 10.33}
|
916 |
+
|
917 |
74%|ββββββββ | 8175/11074 [1:09:49<23:49, 2.03it/s]
|
918 |
74%|ββββββββ | 8176/11074 [1:09:50<23:50, 2.03it/s]
|
919 |
74%|ββββββββ | 8177/11074 [1:09:50<23:51, 2.02it/s]
|
920 |
74%|ββββββββ | 8178/11074 [1:09:51<23:51, 2.02it/s]
|
921 |
74%|ββββββββ | 8179/11074 [1:09:51<23:50, 2.02it/s]
|
922 |
74%|ββββββββ | 8180/11074 [1:09:52<23:48, 2.03it/s]
|
923 |
74%|ββββββββ | 8181/11074 [1:09:52<23:48, 2.02it/s]
|
924 |
74%|ββββββββ | 8182/11074 [1:09:53<23:47, 2.03it/s]
|
925 |
74%|ββββββββ | 8183/11074 [1:09:53<23:47, 2.03it/s]
|
926 |
74%|ββββββββ | 8184/11074 [1:09:54<23:47, 2.02it/s]
|
927 |
74%|ββββββββ | 8185/11074 [1:09:54<23:47, 2.02it/s]
|
928 |
74%|ββββββββ | 8186/11074 [1:09:55<23:46, 2.02it/s]
|
929 |
74%|ββββββββ | 8187/11074 [1:09:55<23:45, 2.03it/s]
|
930 |
74%|ββββββββ | 8188/11074 [1:09:56<23:43, 2.03it/s]
|
931 |
74%|ββββββββ | 8189/11074 [1:09:56<23:45, 2.02it/s]
|
932 |
74%|ββββββββ | 8190/11074 [1:09:56<23:44, 2.03it/s]
|
933 |
74%|ββββββββ | 8191/11074 [1:09:57<23:43, 2.03it/s]
|
934 |
74%|ββββββββ | 8192/11074 [1:09:57<23:43, 2.02it/s]
|
935 |
74%|ββββββββ | 8193/11074 [1:09:58<23:44, 2.02it/s]
|
936 |
74%|ββββββββ | 8194/11074 [1:09:58<23:43, 2.02it/s]
|
937 |
74%|ββββββββ | 8195/11074 [1:09:59<23:41, 2.03it/s]
|
938 |
74%|ββββββββ | 8196/11074 [1:09:59<23:41, 2.02it/s]
|
939 |
74%|ββββββββ | 8197/11074 [1:10:00<23:39, 2.03it/s]
|
940 |
74%|ββββββββ | 8198/11074 [1:10:00<23:39, 2.03it/s]
|
941 |
74%|ββββββββ | 8199/11074 [1:10:01<23:39, 2.03it/s]
|
942 |
74%|ββββββββ | 8200/11074 [1:10:01<23:39, 2.02it/s]
|
943 |
|
944 |
+
|
945 |
74%|ββββββββ | 8200/11074 [1:10:01<23:39, 2.02it/s]
|
946 |
74%|ββββββββ | 8201/11074 [1:10:02<23:39, 2.02it/s]
|
947 |
74%|ββββββββ | 8202/11074 [1:10:02<23:39, 2.02it/s]
|
948 |
74%|ββββββββ | 8203/11074 [1:10:03<23:38, 2.02it/s]
|
949 |
74%|ββββββββ | 8204/11074 [1:10:03<23:38, 2.02it/s]
|
950 |
74%|ββββββββ | 8205/11074 [1:10:04<23:37, 2.02it/s]
|
951 |
74%|ββββββββ | 8206/11074 [1:10:04<23:34, 2.03it/s]
|
952 |
74%|ββββββββ | 8207/11074 [1:10:05<23:35, 2.03it/s]
|
953 |
74%|ββββββββ | 8208/11074 [1:10:05<23:32, 2.03it/s]
|
954 |
74%|ββββββββ | 8209/11074 [1:10:06<23:33, 2.03it/s]
|
955 |
74%|ββββββββ | 8210/11074 [1:10:06<23:32, 2.03it/s]
|
956 |
74%|ββββββββ | 8211/11074 [1:10:07<23:32, 2.03it/s]
|
957 |
74%|ββββββββ | 8212/11074 [1:10:07<23:33, 2.03it/s]
|
958 |
74%|ββββββββ | 8213/11074 [1:10:08<23:32, 2.03it/s]
|
959 |
74%|ββββββββ | 8214/11074 [1:10:08<23:32, 2.02it/s]
|
960 |
74%|ββββββββ | 8215/11074 [1:10:09<23:33, 2.02it/s]
|
961 |
74%|ββββββββ | 8216/11074 [1:10:09<23:32, 2.02it/s]
|
962 |
74%|ββββββββ | 8217/11074 [1:10:10<23:31, 2.02it/s]
|
963 |
74%|ββββββββ | 8218/11074 [1:10:10<23:31, 2.02it/s]
|
964 |
74%|ββββββββ | 8219/11074 [1:10:11<23:29, 2.03it/s]
|
965 |
74%|βββββοΏ½οΏ½ββ | 8220/11074 [1:10:11<23:30, 2.02it/s]
|
966 |
74%|ββββββββ | 8221/11074 [1:10:12<23:29, 2.02it/s]
|
967 |
74%|ββββββββ | 8222/11074 [1:10:12<23:28, 2.02it/s]
|
968 |
74%|ββββββββ | 8223/11074 [1:10:13<23:27, 2.03it/s]
|
969 |
74%|ββββββββ | 8224/11074 [1:10:13<23:27, 2.02it/s]
|
970 |
74%|ββββββββ | 8225/11074 [1:10:14<23:27, 2.02it/s]{'loss': 3.2261, 'grad_norm': 0.24224504828453064, 'learning_rate': 0.00018844878253702113, 'epoch': 10.39}
|
971 |
|
972 |
+
|
973 |
74%|ββββββββ | 8225/11074 [1:10:14<23:27, 2.02it/s]
|
974 |
74%|ββββββββ | 8226/11074 [1:10:14<23:29, 2.02it/s]
|
975 |
74%|ββββββββ | 8227/11074 [1:10:15<23:26, 2.02it/s]
|
976 |
74%|ββββββββ | 8228/11074 [1:10:15<23:27, 2.02it/s]
|
977 |
74%|ββββββββ | 8229/11074 [1:10:16<23:24, 2.02it/s]
|
978 |
74%|ββββββββ | 8230/11074 [1:10:16<23:25, 2.02it/s]
|
979 |
74%|ββββββββ | 8231/11074 [1:10:17<23:25, 2.02it/s]
|
980 |
74%|ββββββββ | 8232/11074 [1:10:17<23:24, 2.02it/s]
|
981 |
74%|ββββββββ | 8233/11074 [1:10:18<23:23, 2.02it/s]
|
982 |
74%|ββββββββ | 8234/11074 [1:10:18<23:23, 2.02it/s]
|
983 |
74%|ββββββββ | 8235/11074 [1:10:19<23:22, 2.02it/s]
|
984 |
74%|ββββββββ | 8236/11074 [1:10:19<23:21, 2.03it/s]
|
985 |
74%|ββββββββ | 8237/11074 [1:10:20<23:21, 2.02it/s]
|
986 |
74%|ββββββββ | 8238/11074 [1:10:20<23:19, 2.03it/s]
|
987 |
74%|ββββββββ | 8239/11074 [1:10:21<23:20, 2.02it/s]
|
988 |
74%|ββββββββ | 8240/11074 [1:10:21<23:18, 2.03it/s]
|
989 |
74%|ββββββββ | 8241/11074 [1:10:22<23:20, 2.02it/s]
|
990 |
74%|ββββββββ | 8242/11074 [1:10:22<23:18, 2.03it/s]
|
991 |
74%|ββββββββ | 8243/11074 [1:10:23<23:18, 2.02it/s]
|
992 |
74%|ββββββββ | 8244/11074 [1:10:23<23:16, 2.03it/s]
|
993 |
74%|ββββββββ | 8245/11074 [1:10:24<23:15, 2.03it/s]
|
994 |
74%|ββββββββ | 8246/11074 [1:10:24<25:18, 1.86it/s]
|
995 |
74%|ββββββββ | 8247/11074 [1:10:25<24:41, 1.91it/s]
|
996 |
74%|ββββββββ | 8248/11074 [1:10:25<24:15, 1.94it/s]
|
997 |
74%|ββββββββ | 8249/11074 [1:10:26<23:56, 1.97it/s]
|
998 |
74%|ββββββββ | 8250/11074 [1:10:26<23:44, 1.98it/s]{'loss': 3.2294, 'grad_norm': 0.24246717989444733, 'learning_rate': 0.00018537655296280926, 'epoch': 10.42}
|
999 |
+
|
1000 |
|
1001 |
74%|ββββββββ | 8250/11074 [1:10:26<23:44, 1.98it/s]
|
1002 |
75%|ββββββββ | 8251/11074 [1:10:27<23:37, 1.99it/s]
|
1003 |
75%|ββββββββ | 8252/11074 [1:10:27<23:31, 2.00it/s]
|
1004 |
75%|ββββββββ | 8253/11074 [1:10:28<23:23, 2.01it/s]
|
1005 |
75%|ββββββββ | 8254/11074 [1:10:28<23:22, 2.01it/s]
|
1006 |
75%|ββββββββ | 8255/11074 [1:10:29<23:16, 2.02it/s]
|
1007 |
75%|ββββββββ | 8256/11074 [1:10:29<23:14, 2.02it/s]
|
1008 |
75%|ββββββββ | 8257/11074 [1:10:30<23:12, 2.02it/s]
|
1009 |
75%|ββββββββ | 8258/11074 [1:10:30<23:12, 2.02it/s]
|
1010 |
75%|ββββββββ | 8259/11074 [1:10:31<23:12, 2.02it/s]
|
1011 |
75%|ββββββββ | 8260/11074 [1:10:31<23:09, 2.02it/s]
|
1012 |
75%|ββββββββ | 8261/11074 [1:10:32<23:09, 2.02it/s]
|
1013 |
75%|ββββββββ | 8262/11074 [1:10:32<23:07, 2.03it/s]
|
1014 |
75%|ββββββββ | 8263/11074 [1:10:33<23:08, 2.02it/s]
|
1015 |
75%|ββββββββ | 8264/11074 [1:10:33<23:06, 2.03it/s]
|
1016 |
75%|ββββββββ | 8265/11074 [1:10:34<23:06, 2.03it/s]
|
1017 |
75%|ββββββββ | 8266/11074 [1:10:34<23:07, 2.02it/s]
|
1018 |
75%|ββββββββ | 8267/11074 [1:10:35<23:06, 2.03it/s]
|
1019 |
75%|ββββββββ | 8268/11074 [1:10:35<23:05, 2.03it/s]
|
1020 |
75%|ββββββββ | 8269/11074 [1:10:36<23:04, 2.03it/s]
|
1021 |
75%|ββββββββ | 8270/11074 [1:10:36<23:04, 2.03it/s]
|
1022 |
75%|ββββββββ | 8271/11074 [1:10:37<23:04, 2.03it/s]
|
1023 |
75%|ββββββββ | 8272/11074 [1:10:37<23:03, 2.02it/s]
|
1024 |
75%|ββββββββ | 8273/11074 [1:10:38<23:02, 2.03it/s]
|
1025 |
75%|ββββββββ | 8274/11074 [1:10:38<23:01, 2.03it/s]
|
1026 |
75%|ββββββββ | 8275/11074 [1:10:39<23:00, 2.03it/s]{'loss': 3.2352, 'grad_norm': 0.23462679982185364, 'learning_rate': 0.00018232386349094988, 'epoch': 10.45}
|
1027 |
+
|
1028 |
|
1029 |
75%|ββββββββ | 8275/11074 [1:10:39<23:00, 2.03it/s]
|
1030 |
75%|ββββββββ | 8276/11074 [1:10:39<23:02, 2.02it/s]
|
1031 |
75%|ββββββββ | 8277/11074 [1:10:40<23:02, 2.02it/s]
|
1032 |
75%|ββββββββ | 8278/11074 [1:10:40<23:01, 2.02it/s]
|
1033 |
75%|ββββββββ | 8279/11074 [1:10:41<23:00, 2.02it/s]
|
1034 |
75%|ββββββββ | 8280/11074 [1:10:41<22:59, 2.02it/s]
|
1035 |
75%|ββββββββ | 8281/11074 [1:10:42<22:58, 2.03it/s]
|
1036 |
75%|ββββββββ | 8282/11074 [1:10:42<22:58, 2.02it/s]
|
1037 |
75%|ββββββββ | 8283/11074 [1:10:43<22:57, 2.03it/s]
|
1038 |
75%|ββββββββ | 8284/11074 [1:10:43<22:56, 2.03it/s]
|
1039 |
75%|ββββββββ | 8285/11074 [1:10:44<22:56, 2.03it/s]
|
1040 |
75%|ββββββββ | 8286/11074 [1:10:44<22:56, 2.02it/s]
|
1041 |
75%|ββββββββ | 8287/11074 [1:10:45<22:56, 2.02it/s]
|
1042 |
75%|ββββββββ | 8288/11074 [1:10:45<22:56, 2.02it/s]
|
1043 |
75%|ββββββββ | 8289/11074 [1:10:46<22:54, 2.03it/s]
|
1044 |
75%|ββββββββ | 8290/11074 [1:10:46<22:54, 2.03it/s]
|
1045 |
75%|ββββββββ | 8291/11074 [1:10:47<22:52, 2.03it/s]
|
1046 |
75%|ββββββββ | 8292/11074 [1:10:47<22:53, 2.03it/s]
|
1047 |
75%|ββββββββ | 8293/11074 [1:10:48<22:51, 2.03it/s]
|
1048 |
75%|ββββββββ | 8294/11074 [1:10:48<22:54, 2.02it/s]
|
1049 |
75%|ββββββββ | 8295/11074 [1:10:48<22:52, 2.02it/s]
|
1050 |
75%|ββββββββ | 8296/11074 [1:10:49<22:51, 2.03it/s]
|
1051 |
75%|ββββββββ | 8297/11074 [1:10:49<22:49, 2.03it/s]
|
1052 |
75%|ββββββββ | 8298/11074 [1:10:50<22:47, 2.03it/s]
|
1053 |
75%|ββββββββ | 8299/11074 [1:10:50<22:49, 2.03it/s]
|
1054 |
75%|ββββββββ | 8300/11074 [1:10:51<22:48, 2.03it/s]
|
1055 |
|
1056 |
+
|
1057 |
75%|ββββββββ | 8300/11074 [1:10:51<22:48, 2.03it/s]
|
1058 |
75%|ββββββββ | 8301/11074 [1:10:51<22:50, 2.02it/s]
|
1059 |
75%|ββββββββ | 8302/11074 [1:10:52<22:49, 2.02it/s]
|
1060 |
75%|ββββββββ | 8303/11074 [1:10:52<22:50, 2.02it/s]
|
1061 |
75%|ββββββββ | 8304/11074 [1:10:53<22:49, 2.02it/s]
|
1062 |
75%|ββββββββ | 8305/11074 [1:10:53<22:48, 2.02it/s]
|
1063 |
75%|ββββββββ | 8306/11074 [1:10:54<22:47, 2.02it/s]
|
1064 |
75%|ββββββββ | 8307/11074 [1:10:54<22:48, 2.02it/s]
|
1065 |
75%|ββββββββ | 8308/11074 [1:10:55<22:46, 2.02it/s]
|
1066 |
75%|ββββββββ | 8309/11074 [1:10:55<22:46, 2.02it/s]
|
1067 |
75%|ββββββββ | 8310/11074 [1:10:56<22:44, 2.03it/s]
|
1068 |
75%|ββββββββ | 8311/11074 [1:10:56<22:44, 2.03it/s]
|
1069 |
75%|ββββββββ | 8312/11074 [1:10:57<22:43, 2.03it/s]
|
1070 |
75%|ββββββββ | 8313/11074 [1:10:57<22:43, 2.02it/s]
|
1071 |
75%|ββββββββ | 8314/11074 [1:10:58<22:43, 2.02it/s]
|
1072 |
75%|ββββββββ | 8315/11074 [1:10:58<22:42, 2.02it/s]
|
1073 |
75%|ββββββββ | 8316/11074 [1:10:59<22:41, 2.03it/s]
|
1074 |
75%|ββββββββ | 8317/11074 [1:10:59<22:41, 2.03it/s]
|
1075 |
75%|ββββββββ | 8318/11074 [1:11:00<22:39, 2.03it/s]
|
1076 |
75%|ββββββββ | 8319/11074 [1:11:00<22:39, 2.03it/s]
|
1077 |
75%|ββββββββ | 8320/11074 [1:11:01<22:39, 2.03it/s]
|
1078 |
75%|ββββββββ | 8321/11074 [1:11:01<22:37, 2.03it/s]
|
1079 |
75%|ββββββββ | 8322/11074 [1:11:02<22:38, 2.03it/s]
|
1080 |
75%|ββββββββ | 8323/11074 [1:11:02<22:36, 2.03it/s]
|
1081 |
75%|ββββββββ | 8324/11074 [1:11:03<22:36, 2.03it/s]
|
1082 |
75%|ββββββββ | 8325/11074 [1:11:03<22:35, 2.03it/s]
|
1083 |
{'loss': 3.244, 'grad_norm': 0.24260075390338898, 'learning_rate': 0.0001762778619941043, 'epoch': 10.52}
|
1084 |
+
|
1085 |
75%|ββββββββ | 8325/11074 [1:11:03<22:35, 2.03it/s]
|
1086 |
75%|ββββββββ | 8326/11074 [1:11:04<24:35, 1.86it/s]
|
1087 |
75%|ββββββββ | 8327/11074 [1:11:04<23:58, 1.91it/s]
|
1088 |
75%|ββββββββ | 8328/11074 [1:11:05<23:32, 1.94it/s]
|
1089 |
75%|ββββββββ | 8329/11074 [1:11:05<23:14, 1.97it/s]
|
1090 |
75%|ββββββββ | 8330/11074 [1:11:06<23:02, 1.99it/s]
|
1091 |
75%|ββββββββ | 8331/11074 [1:11:06<22:53, 2.00it/s]
|
1092 |
75%|ββββββββ | 8332/11074 [1:11:07<22:47, 2.00it/s]
|
1093 |
75%|ββββββββ | 8333/11074 [1:11:07<22:42, 2.01it/s]
|
1094 |
75%|ββββββββ | 8334/11074 [1:11:08<22:37, 2.02it/s]
|
1095 |
75%|ββββββββ | 8335/11074 [1:11:08<22:36, 2.02it/s]
|
1096 |
75%|ββββββββ | 8336/11074 [1:11:09<22:35, 2.02it/s]
|
1097 |
75%|ββββββββ | 8337/11074 [1:11:09<22:34, 2.02it/s]
|
1098 |
75%|ββββββββ | 8338/11074 [1:11:10<22:32, 2.02it/s]
|
1099 |
75%|ββββββββ | 8339/11074 [1:11:10<22:32, 2.02it/s]
|
1100 |
75%|ββββββββ | 8340/11074 [1:11:11<22:31, 2.02it/s]
|
1101 |
75%|ββββββββ | 8341/11074 [1:11:11<22:31, 2.02it/s]
|
1102 |
75%|ββββββββ | 8342/11074 [1:11:12<22:29, 2.02it/s]
|
1103 |
75%|ββββββββ | 8343/11074 [1:11:12<22:29, 2.02it/s]
|
1104 |
75%|ββββββββ | 8344/11074 [1:11:13<22:27, 2.03it/s]
|
1105 |
75%|ββββββββ | 8345/11074 [1:11:13<22:26, 2.03it/s]
|
1106 |
75%|ββββββββ | 8346/11074 [1:11:14<22:27, 2.03it/s]
|
1107 |
75%|ββββββββ | 8347/11074 [1:11:14<22:24, 2.03it/s]
|
1108 |
75%|ββββββββ | 8348/11074 [1:11:15<22:25, 2.03it/s]
|
1109 |
75%|ββββββββ | 8349/11074 [1:11:15<22:24, 2.03it/s]
|
1110 |
75%|ββββββββ | 8350/11074 [1:11:16<22:24, 2.03it/s]{'loss': 3.2382, 'grad_norm': 0.23930463194847107, 'learning_rate': 0.0001732849254639809, 'epoch': 10.55}
|
1111 |
|
1112 |
+
|
1113 |
75%|ββββββββ | 8350/11074 [1:11:16<22:24, 2.03it/s]
|
1114 |
75%|ββββββββ | 8351/11074 [1:11:16<22:25, 2.02it/s]
|
1115 |
75%|ββββββββ | 8352/11074 [1:11:17<22:26, 2.02it/s]
|
1116 |
75%|ββββββββ | 8353/11074 [1:11:17<22:25, 2.02it/s]
|
1117 |
75%|ββββββββ | 8354/11074 [1:11:18<22:24, 2.02it/s]
|
1118 |
75%|ββββββββ | 8355/11074 [1:11:18<22:22, 2.03it/s]
|
1119 |
75%|ββββββββ | 8356/11074 [1:11:19<22:22, 2.02it/s]
|
1120 |
75%|ββββββββ | 8357/11074 [1:11:19<22:21, 2.03it/s]
|
1121 |
75%|ββββββββ | 8358/11074 [1:11:20<22:19, 2.03it/s]
|
1122 |
75%|ββββββββ | 8359/11074 [1:11:20<22:19, 2.03it/s]
|
1123 |
75%|ββββββββ | 8360/11074 [1:11:21<22:17, 2.03it/s]
|
1124 |
76%|ββββββββ | 8361/11074 [1:11:21<22:17, 2.03it/s]
|
1125 |
76%|ββββββββ | 8362/11074 [1:11:22<22:17, 2.03it/s]
|
1126 |
76%|ββββββββ | 8363/11074 [1:11:22<22:18, 2.03it/s]
|
1127 |
76%|ββββββββ | 8364/11074 [1:11:23<22:17, 2.03it/s]
|
1128 |
76%|ββββββββ | 8365/11074 [1:11:23<22:18, 2.02it/s]
|
1129 |
76%|ββββββββ | 8366/11074 [1:11:24<22:15, 2.03it/s]
|
1130 |
76%|ββββββββ | 8367/11074 [1:11:24<22:16, 2.03it/s]
|
1131 |
76%|ββββββββ | 8368/11074 [1:11:25<22:16, 2.02it/s]
|
1132 |
76%|ββββββββ | 8369/11074 [1:11:25<22:16, 2.02it/s]
|
1133 |
76%|ββββββββ | 8370/11074 [1:11:26<22:16, 2.02it/s]
|
1134 |
76%|ββββββββ | 8371/11074 [1:11:26<22:15, 2.02it/s]
|
1135 |
76%|ββββββββ | 8372/11074 [1:11:27<22:15, 2.02it/s]
|
1136 |
76%|ββββββββ | 8373/11074 [1:11:27<22:14, 2.02it/s]
|
1137 |
76%|ββββββββ | 8374/11074 [1:11:28<22:15, 2.02it/s]
|
1138 |
76%|ββββββββ | 8375/11074 [1:11:28<22:13, 2.02it/s]{'loss': 3.2304, 'grad_norm': 0.23772986233234406, 'learning_rate': 0.000170312280002615, 'epoch': 10.58}
|
1139 |
|
1140 |
+
|
1141 |
76%|ββββββββ | 8375/11074 [1:11:28<22:13, 2.02it/s]
|
1142 |
76%|ββββββββ | 8376/11074 [1:11:29<22:15, 2.02it/s]
|
1143 |
76%|ββββββββ | 8377/11074 [1:11:29<22:13, 2.02it/s]
|
1144 |
76%|ββββββββ | 8378/11074 [1:11:30<22:13, 2.02it/s]
|
1145 |
76%|ββββββββ | 8379/11074 [1:11:30<22:11, 2.02it/s]
|
1146 |
76%|ββββββββ | 8380/11074 [1:11:31<22:12, 2.02it/s]
|
1147 |
76%|ββββββββ | 8381/11074 [1:11:31<22:11, 2.02it/s]
|
1148 |
76%|ββββββββ | 8382/11074 [1:11:32<22:11, 2.02it/s]
|
1149 |
76%|ββββββββ | 8383/11074 [1:11:32<22:09, 2.02it/s]
|
1150 |
76%|ββββββββ | 8384/11074 [1:11:33<22:09, 2.02it/s]
|
1151 |
76%|ββββββββ | 8385/11074 [1:11:33<22:07, 2.03it/s]
|
1152 |
76%|ββββββββ | 8386/11074 [1:11:34<22:07, 2.02it/s]
|
1153 |
76%|ββββββββ | 8387/11074 [1:11:34<22:07, 2.02it/s]
|
1154 |
76%|ββββββββ | 8388/11074 [1:11:35<22:06, 2.02it/s]
|
1155 |
76%|ββββββββ | 8389/11074 [1:11:35<22:06, 2.02it/s]
|
1156 |
76%|ββββββββ | 8390/11074 [1:11:36<22:04, 2.03it/s]
|
1157 |
76%|ββββββββ | 8391/11074 [1:11:36<22:05, 2.02it/s]
|
1158 |
76%|ββββββββ | 8392/11074 [1:11:37<22:03, 2.03it/s]
|
1159 |
76%|ββββββββ | 8393/11074 [1:11:37<22:04, 2.02it/s]
|
1160 |
76%|ββββββββ | 8394/11074 [1:11:38<22:02, 2.03it/s]
|
1161 |
76%|ββββββββ | 8395/11074 [1:11:38<22:02, 2.03it/s]
|
1162 |
76%|ββββββββ | 8396/11074 [1:11:39<22:01, 2.03it/s]
|
1163 |
76%|ββββββββ | 8397/11074 [1:11:39<22:00, 2.03it/s]
|
1164 |
76%|ββββββββ | 8398/11074 [1:11:39<22:01, 2.03it/s]
|
1165 |
76%|ββββββββ | 8399/11074 [1:11:40<22:00, 2.03it/s]
|
1166 |
76%|ββββββββ | 8400/11074 [1:11:40<22:00, 2.02it/s]{'loss': 3.2414, 'grad_norm': 0.23746074736118317, 'learning_rate': 0.00016736011023005699, 'epoch': 10.61}
|
1167 |
+
|
1168 |
|
1169 |
76%|ββββββββ | 8400/11074 [1:11:40<22:00, 2.02it/s]
|
1170 |
76%|ββββββββ | 8401/11074 [1:11:41<22:01, 2.02it/s]
|
1171 |
76%|ββββββββ | 8402/11074 [1:11:41<22:02, 2.02it/s]
|
1172 |
76%|ββββββββ | 8403/11074 [1:11:42<22:00, 2.02it/s]
|
1173 |
76%|ββββββββ | 8404/11074 [1:11:42<22:01, 2.02it/s]
|
1174 |
76%|ββββββββ | 8405/11074 [1:11:43<21:58, 2.02it/s]
|
1175 |
76%|ββββββββ | 8406/11074 [1:11:43<21:59, 2.02it/s]
|
1176 |
76%|ββββββββ | 8407/11074 [1:11:44<21:57, 2.02it/s]
|
1177 |
76%|ββββββββ | 8408/11074 [1:11:44<21:57, 2.02it/s]
|
1178 |
76%|ββββββββ | 8409/11074 [1:11:45<21:57, 2.02it/s]
|
1179 |
76%|βοΏ½οΏ½οΏ½ββββββ | 8410/11074 [1:11:45<21:57, 2.02it/s]
|
1180 |
76%|ββββββββ | 8411/11074 [1:11:46<21:57, 2.02it/s]
|
1181 |
76%|ββββββββ | 8412/11074 [1:11:46<21:56, 2.02it/s]
|
1182 |
76%|ββββββββ | 8413/11074 [1:11:47<21:56, 2.02it/s]
|
1183 |
76%|ββββββββ | 8414/11074 [1:11:47<21:55, 2.02it/s]
|
1184 |
76%|ββββββββ | 8415/11074 [1:11:48<21:55, 2.02it/s]
|
1185 |
76%|ββββββββ | 8416/11074 [1:11:48<21:54, 2.02it/s]
|
1186 |
76%|ββββββββ | 8417/11074 [1:11:49<21:54, 2.02it/s]
|
1187 |
76%|ββββββββ | 8418/11074 [1:11:49<21:53, 2.02it/s]
|
1188 |
76%|ββββββββ | 8419/11074 [1:11:50<21:54, 2.02it/s]
|
1189 |
76%|ββββββββ | 8420/11074 [1:11:50<21:53, 2.02it/s]
|
1190 |
76%|ββββββββ | 8421/11074 [1:11:51<21:51, 2.02it/s]
|
1191 |
76%|ββββββββ | 8422/11074 [1:11:51<21:51, 2.02it/s]
|
1192 |
76%|ββββββββ | 8423/11074 [1:11:52<21:51, 2.02it/s]
|
1193 |
76%|ββββββββ | 8424/11074 [1:11:52<21:52, 2.02it/s]
|
1194 |
76%|ββββββββ | 8425/11074 [1:11:53<21:50, 2.02it/s]{'loss': 3.2383, 'grad_norm': 0.23675471544265747, 'learning_rate': 0.0001644285994946871, 'epoch': 10.64}
|
1195 |
|
1196 |
+
|
1197 |
76%|ββββββββ | 8425/11074 [1:11:53<21:50, 2.02it/s]
|
1198 |
76%|ββββββββ | 8426/11074 [1:11:53<21:56, 2.01it/s]
|
1199 |
76%|ββββββββ | 8427/11074 [1:11:54<21:52, 2.02it/s]
|
1200 |
76%|ββββββββ | 8428/11074 [1:11:54<21:50, 2.02it/s]
|
1201 |
76%|ββββββββ | 8429/11074 [1:11:55<21:47, 2.02it/s]
|
1202 |
76%|ββββββββ | 8430/11074 [1:11:55<21:48, 2.02it/s]
|
1203 |
76%|ββββββββ | 8431/11074 [1:11:56<21:46, 2.02it/s]
|
1204 |
76%|ββββββββ | 8432/11074 [1:11:56<21:46, 2.02it/s]
|
1205 |
76%|ββββββββ | 8433/11074 [1:11:57<21:45, 2.02it/s]
|
1206 |
76%|ββββββββ | 8434/11074 [1:11:57<21:43, 2.03it/s]
|
1207 |
76%|ββββββββ | 8435/11074 [1:11:58<21:43, 2.02it/s]
|
1208 |
76%|ββββββββ | 8436/11074 [1:11:58<21:41, 2.03it/s]
|
1209 |
76%|ββββββββ | 8437/11074 [1:11:59<21:41, 2.03it/s]
|
1210 |
76%|ββββββββ | 8438/11074 [1:11:59<21:41, 2.03it/s]
|
1211 |
76%|ββββββββ | 8439/11074 [1:12:00<21:40, 2.03it/s]
|
1212 |
76%|ββββββββ | 8440/11074 [1:12:00<21:40, 2.02it/s]
|
1213 |
76%|ββββββββ | 8441/11074 [1:12:01<21:38, 2.03it/s]
|
1214 |
76%|ββββββββ | 8442/11074 [1:12:01<21:39, 2.03it/s]
|
1215 |
76%|ββββββββ | 8443/11074 [1:12:02<21:37, 2.03it/s]
|
1216 |
76%|ββββββββ | 8444/11074 [1:12:02<21:37, 2.03it/s]
|
1217 |
76%|ββββββββ | 8445/11074 [1:12:03<21:37, 2.03it/s]
|
1218 |
76%|ββββββββ | 8446/11074 [1:12:03<21:38, 2.02it/s]
|
1219 |
76%|ββββββββ | 8447/11074 [1:12:04<21:38, 2.02it/s]
|
1220 |
76%|ββββββββ | 8448/11074 [1:12:04<21:38, 2.02it/s]
|
1221 |
76%|ββββββββ | 8449/11074 [1:12:05<21:36, 2.02it/s]
|
1222 |
76%|ββββββββ | 8450/11074 [1:12:05<21:34, 2.03it/s]
|
1223 |
|
1224 |
+
|
1225 |
76%|ββββββββ | 8450/11074 [1:12:05<21:34, 2.03it/s]
|
1226 |
76%|ββββββββ | 8451/11074 [1:12:06<21:37, 2.02it/s]
|
1227 |
76%|ββββββββ | 8452/11074 [1:12:06<21:34, 2.03it/s]
|
1228 |
76%|ββββββββ | 8453/11074 [1:12:07<21:34, 2.02it/s]
|
1229 |
76%|ββββββββ | 8454/11074 [1:12:07<21:33, 2.03it/s]
|
1230 |
76%|ββββββββ | 8455/11074 [1:12:08<21:33, 2.02it/s]
|
1231 |
76%|ββββββββ | 8456/11074 [1:12:08<21:33, 2.02it/s]
|
1232 |
76%|ββββββββ | 8457/11074 [1:12:09<21:34, 2.02it/s]
|
1233 |
76%|ββββββββ | 8458/11074 [1:12:09<21:33, 2.02it/s]
|
1234 |
76%|ββββββββ | 8459/11074 [1:12:10<21:32, 2.02it/s]
|
1235 |
76%|ββββββββ | 8460/11074 [1:12:10<21:30, 2.03it/s]
|
1236 |
76%|ββββββββ | 8461/11074 [1:12:11<21:31, 2.02it/s]
|
1237 |
76%|ββββββββ | 8462/11074 [1:12:11<21:29, 2.02it/s]
|
1238 |
76%|ββββββββ | 8463/11074 [1:12:12<21:30, 2.02it/s]
|
1239 |
76%|ββββββββ | 8464/11074 [1:12:12<21:29, 2.02it/s]
|
1240 |
76%|ββββββββ | 8465/11074 [1:12:13<21:28, 2.02it/s]
|
1241 |
76%|ββββββββ | 8466/11074 [1:12:13<21:28, 2.02it/s]
|
1242 |
76%|ββββββββ | 8467/11074 [1:12:14<21:27, 2.03it/s]
|
1243 |
76%|ββββββββ | 8468/11074 [1:12:14<21:27, 2.02it/s]
|
1244 |
76%|ββββββββ | 8469/11074 [1:12:15<21:26, 2.03it/s]
|
1245 |
76%|ββββββββ | 8470/11074 [1:12:15<21:26, 2.02it/s]
|
1246 |
76%|ββββββββ | 8471/11074 [1:12:16<21:24, 2.03it/s]
|
1247 |
77%|ββββββββ | 8472/11074 [1:12:16<21:24, 2.03it/s]
|
1248 |
77%|ββββββββ | 8473/11074 [1:12:17<21:24, 2.03it/s]
|
1249 |
77%|ββββββββ | 8474/11074 [1:12:17<21:23, 2.02it/s]
|
1250 |
77%|ββββββββ | 8475/11074 [1:12:18<21:23, 2.03it/s]{'loss': 3.2461, 'grad_norm': 0.23413671553134918, 'learning_rate': 0.00015862828210244434, 'epoch': 10.71}
|
1251 |
|
1252 |
+
|
1253 |
77%|ββββββββ | 8475/11074 [1:12:18<21:23, 2.03it/s]
|
1254 |
77%|ββββββββ | 8476/11074 [1:12:18<21:25, 2.02it/s]
|
1255 |
77%|ββββββββ | 8477/11074 [1:12:19<21:23, 2.02it/s]
|
1256 |
77%|ββββββββ | 8478/11074 [1:12:19<21:23, 2.02it/s]
|
1257 |
77%|ββββββββ | 8479/11074 [1:12:20<21:22, 2.02it/s]
|
1258 |
77%|ββββββββ | 8480/11074 [1:12:20<21:21, 2.02it/s]
|
1259 |
77%|ββββββββ | 8481/11074 [1:12:21<21:21, 2.02it/s]
|
1260 |
77%|ββββββββ | 8482/11074 [1:12:21<21:20, 2.02it/s]
|
1261 |
77%|ββββββββ | 8483/11074 [1:12:22<21:20, 2.02it/s]
|
1262 |
77%|ββββββββ | 8484/11074 [1:12:22<21:19, 2.02it/s]
|
1263 |
77%|ββββββββ | 8485/11074 [1:12:22<21:20, 2.02it/s]
|
1264 |
77%|ββββββββ | 8486/11074 [1:12:23<21:19, 2.02it/s]
|
1265 |
77%|ββββββββ | 8487/11074 [1:12:23<21:18, 2.02it/s]
|
1266 |
77%|ββββββββ | 8488/11074 [1:12:24<21:17, 2.03it/s]
|
1267 |
77%|ββββββββ | 8489/11074 [1:12:24<21:16, 2.02it/s]
|
1268 |
77%|ββββββββ | 8490/11074 [1:12:25<21:15, 2.03it/s]
|
1269 |
77%|ββββββββ | 8491/11074 [1:12:25<21:16, 2.02it/s]
|
1270 |
77%|ββββββββ | 8492/11074 [1:12:26<21:14, 2.03it/s]
|
1271 |
77%|ββββββββ | 8493/11074 [1:12:26<21:14, 2.02it/s]
|
1272 |
77%|ββββββββ | 8494/11074 [1:12:27<21:13, 2.03it/s]
|
1273 |
77%|ββββββββ | 8495/11074 [1:12:27<21:14, 2.02it/s]
|
1274 |
77%|ββββββββ | 8496/11074 [1:12:28<21:13, 2.02it/s]
|
1275 |
77%|ββββββββ | 8497/11074 [1:12:28<21:12, 2.03it/s]
|
1276 |
77%|ββββββββ | 8498/11074 [1:12:29<21:12, 2.02it/s]
|
1277 |
77%|ββββββββ | 8499/11074 [1:12:29<21:12, 2.02it/s]
|
1278 |
77%|ββββββββ | 8500/11074 [1:12:30<21:12, 2.02it/s]
|
1279 |
{'loss': 3.2367, 'grad_norm': 0.23599669337272644, 'learning_rate': 0.0001557598356819, 'epoch': 10.74}
|
1280 |
+
|
1281 |
77%|ββββββββ | 8500/11074 [1:12:30<21:12, 2.02it/s]
|
1282 |
77%|ββββββββ | 8501/11074 [1:12:30<21:14, 2.02it/s]
|
1283 |
77%|ββββββββ | 8502/11074 [1:12:31<21:12, 2.02it/s]
|
1284 |
77%|ββββββββ | 8503/11074 [1:12:31<21:12, 2.02it/s]
|
1285 |
77%|ββββββββ | 8504/11074 [1:12:32<21:10, 2.02it/s]
|
1286 |
77%|ββββββββ | 8505/11074 [1:12:32<21:09, 2.02it/s]
|
1287 |
77%|ββββββββ | 8506/11074 [1:12:33<21:09, 2.02it/s]
|
1288 |
77%|ββββββββ | 8507/11074 [1:12:33<21:07, 2.02it/s]
|
1289 |
77%|ββββββββ | 8508/11074 [1:12:34<21:08, 2.02it/s]
|
1290 |
77%|ββββββββ | 8509/11074 [1:12:34<21:08, 2.02it/s]
|
1291 |
77%|ββββββββ | 8510/11074 [1:12:35<21:07, 2.02it/s]
|
1292 |
77%|ββββββββ | 8511/11074 [1:12:35<21:06, 2.02it/s]
|
1293 |
77%|ββββββββ | 8512/11074 [1:12:36<21:06, 2.02it/s]
|
1294 |
77%|ββββββββ | 8513/11074 [1:12:36<21:03, 2.03it/s]
|
1295 |
77%|ββββββββ | 8514/11074 [1:12:37<21:03, 2.03it/s]
|
1296 |
77%|ββββββββ | 8515/11074 [1:12:37<21:03, 2.03it/s]
|
1297 |
77%|ββββββββ | 8516/11074 [1:12:38<21:03, 2.02it/s]
|
1298 |
77%|ββββββββ | 8517/11074 [1:12:38<21:02, 2.03it/s]
|
1299 |
77%|ββββββββ | 8518/11074 [1:12:39<21:01, 2.03it/s]
|
1300 |
77%|ββββββββ | 8519/11074 [1:12:39<21:01, 2.03it/s]
|
1301 |
77%|ββββββββ | 8520/11074 [1:12:40<20:59, 2.03it/s]
|
1302 |
77%|ββββββββ | 8521/11074 [1:12:40<21:00, 2.03it/s]
|
1303 |
77%|ββββββββ | 8522/11074 [1:12:41<20:58, 2.03it/s]
|
1304 |
77%|ββββββββ | 8523/11074 [1:12:41<20:59, 2.03it/s]
|
1305 |
77%|ββββββββ | 8524/11074 [1:12:42<20:58, 2.03it/s]
|
1306 |
77%|ββββββββ | 8525/11074 [1:12:42<20:58, 2.03it/s]{'loss': 3.2446, 'grad_norm': 0.2339746206998825, 'learning_rate': 0.00015291276874882887, 'epoch': 10.77}
|
1307 |
+
|
1308 |
|
1309 |
77%|ββββββββ | 8525/11074 [1:12:42<20:58, 2.03it/s]
|
1310 |
77%|ββββββββ | 8526/11074 [1:12:43<20:59, 2.02it/s]
|
1311 |
77%|ββββββββ | 8527/11074 [1:12:43<20:59, 2.02it/s]
|
1312 |
77%|ββββββββ | 8528/11074 [1:12:44<20:58, 2.02it/s]
|
1313 |
77%|ββββββββ | 8529/11074 [1:12:44<20:57, 2.02it/s]
|
1314 |
77%|ββββββββ | 8530/11074 [1:12:45<20:57, 2.02it/s]
|
1315 |
77%|ββββββββ | 8531/11074 [1:12:45<20:55, 2.03it/s]
|
1316 |
77%|ββββββββ | 8532/11074 [1:12:46<20:55, 2.02it/s]
|
1317 |
77%|ββββββββ | 8533/11074 [1:12:46<20:54, 2.03it/s]
|
1318 |
77%|ββββββββ | 8534/11074 [1:12:47<20:54, 2.02it/s]
|
1319 |
77%|ββββββββ | 8535/11074 [1:12:47<20:53, 2.03it/s]
|
1320 |
77%|ββββββββ | 8536/11074 [1:12:48<20:53, 2.02it/s]
|
1321 |
77%|ββββββββ | 8537/11074 [1:12:48<20:54, 2.02it/s]
|
1322 |
77%|ββββββββ | 8538/11074 [1:12:49<20:53, 2.02it/s]
|
1323 |
77%|ββββββββ | 8539/11074 [1:12:49<20:54, 2.02it/s]
|
1324 |
77%|ββββββββ | 8540/11074 [1:12:50<20:55, 2.02it/s]
|
1325 |
77%|ββββββββ | 8541/11074 [1:12:50<20:54, 2.02it/s]
|
1326 |
77%|ββββββββ | 8542/11074 [1:12:51<20:52, 2.02it/s]
|
1327 |
77%|ββββββββ | 8543/11074 [1:12:51<20:52, 2.02it/s]
|
1328 |
77%|ββββββββ | 8544/11074 [1:12:52<20:51, 2.02it/s]
|
1329 |
77%|ββββββββ | 8545/11074 [1:12:52<20:51, 2.02it/s]
|
1330 |
77%|ββββββββ | 8546/11074 [1:12:53<20:50, 2.02it/s]
|
1331 |
77%|ββββββββ | 8547/11074 [1:12:53<20:48, 2.02it/s]
|
1332 |
77%|ββββββββ | 8548/11074 [1:12:54<20:48, 2.02it/s]
|
1333 |
77%|ββββββββ | 8549/11074 [1:12:54<20:47, 2.02it/s]
|
1334 |
77%|ββββββββ | 8550/11074 [1:12:55<20:47, 2.02it/s]{'loss': 3.24, 'grad_norm': 0.24058009684085846, 'learning_rate': 0.00015008725812406143, 'epoch': 10.8}
|
1335 |
+
|
1336 |
|
1337 |
77%|ββββββββ | 8550/11074 [1:12:55<20:47, 2.02it/s]
|
1338 |
77%|ββββββββ | 8551/11074 [1:12:55<20:48, 2.02it/s]
|
1339 |
77%|ββββββββ | 8552/11074 [1:12:56<20:48, 2.02it/s]
|
1340 |
77%|ββββββββ | 8553/11074 [1:12:56<20:47, 2.02it/s]
|
1341 |
77%|ββββββββ | 8554/11074 [1:12:57<20:46, 2.02it/s]
|
1342 |
77%|ββββββββ | 8555/11074 [1:12:57<20:46, 2.02it/s]
|
1343 |
77%|ββββββββ | 8556/11074 [1:12:58<20:44, 2.02it/s]
|
1344 |
77%|ββββββββ | 8557/11074 [1:12:58<20:43, 2.02it/s]
|
1345 |
77%|ββββββββ | 8558/11074 [1:12:59<20:42, 2.03it/s]
|
1346 |
77%|ββββββββ | 8559/11074 [1:12:59<20:41, 2.03it/s]
|
1347 |
77%|ββββββββ | 8560/11074 [1:13:00<20:41, 2.03it/s]
|
1348 |
77%|ββββββββ | 8561/11074 [1:13:00<20:41, 2.02it/s]
|
1349 |
77%|ββββββββ | 8562/11074 [1:13:01<20:41, 2.02it/s]
|
1350 |
77%|ββββββββ | 8563/11074 [1:13:01<20:40, 2.02it/s]
|
1351 |
77%|ββββββββ | 8564/11074 [1:13:02<20:40, 2.02it/s]
|
1352 |
77%|ββββββββ | 8565/11074 [1:13:02<20:38, 2.03it/s]
|
1353 |
77%|ββββββββ | 8566/11074 [1:13:03<20:39, 2.02it/s]
|
1354 |
77%|ββββββββ | 8567/11074 [1:13:03<20:36, 2.03it/s]
|
1355 |
77%|ββββββββ | 8568/11074 [1:13:04<20:37, 2.03it/s]
|
1356 |
77%|ββββββββ | 8569/11074 [1:13:04<20:36, 2.03it/s]
|
1357 |
77%|ββββββββ | 8570/11074 [1:13:04<20:36, 2.02it/s]
|
1358 |
77%|ββββββββ | 8571/11074 [1:13:05<20:36, 2.02it/s]
|
1359 |
77%|ββββββββ | 8572/11074 [1:13:05<20:34, 2.03it/s]
|
1360 |
77%|ββββββββ | 8573/11074 [1:13:06<20:36, 2.02it/s]
|
1361 |
77%|ββββββββ | 8574/11074 [1:13:06<20:33, 2.03it/s]
|
1362 |
77%|ββββββββ | 8575/11074 [1:13:07<20:34, 2.02it/s]{'loss': 3.2475, 'grad_norm': 0.23284657299518585, 'learning_rate': 0.00014728347928964547, 'epoch': 10.83}
|
1363 |
+
|
1364 |
|
1365 |
77%|ββββββββ | 8575/11074 [1:13:07<20:34, 2.02it/s]
|
1366 |
77%|ββββββββ | 8576/11074 [1:13:07<20:34, 2.02it/s]
|
1367 |
77%|ββββββββ | 8577/11074 [1:13:08<20:33, 2.02it/s]
|
1368 |
77%|ββββββββ | 8578/11074 [1:13:08<20:33, 2.02it/s]
|
1369 |
77%|ββββββββ | 8579/11074 [1:13:09<20:33, 2.02it/s]
|
1370 |
77%|ββββββββ | 8580/11074 [1:13:09<20:32, 2.02it/s]
|
1371 |
77%|ββββββββ | 8581/11074 [1:13:10<20:31, 2.02it/s]
|
1372 |
77%|ββββββββ | 8582/11074 [1:13:10<20:31, 2.02it/s]
|
1373 |
78%|ββββββββ | 8583/11074 [1:13:11<20:29, 2.03it/s]
|
1374 |
78%|ββββββββ | 8584/11074 [1:13:11<20:29, 2.02it/s]
|
1375 |
78%|ββββββββ | 8585/11074 [1:13:12<20:29, 2.02it/s]
|
1376 |
78%|ββββββββ | 8586/11074 [1:13:12<20:30, 2.02it/s]
|
1377 |
78%|ββββββββ | 8587/11074 [1:13:13<20:28, 2.02it/s]
|
1378 |
78%|ββββββββ | 8588/11074 [1:13:13<20:30, 2.02it/s]
|
1379 |
78%|ββββββββ | 8589/11074 [1:13:14<20:37, 2.01it/s]
|
1380 |
78%|ββββββββ | 8590/11074 [1:13:14<20:33, 2.01it/s]
|
1381 |
78%|ββββββββ | 8591/11074 [1:13:15<20:30, 2.02it/s]
|
1382 |
78%|ββββββββ | 8592/11074 [1:13:15<20:29, 2.02it/s]
|
1383 |
78%|ββββββββ | 8593/11074 [1:13:16<20:28, 2.02it/s]
|
1384 |
78%|ββββββββ | 8594/11074 [1:13:16<20:27, 2.02it/s]
|
1385 |
78%|ββββββββ | 8595/11074 [1:13:17<20:26, 2.02it/s]
|
1386 |
78%|ββββββββ | 8596/11074 [1:13:17<20:26, 2.02it/s]
|
1387 |
78%|ββββββββ | 8597/11074 [1:13:18<20:25, 2.02it/s]
|
1388 |
78%|ββββββββ | 8598/11074 [1:13:18<20:25, 2.02it/s]
|
1389 |
78%|ββββββββ | 8599/11074 [1:13:19<20:24, 2.02it/s]
|
1390 |
78%|ββββββββ | 8600/11074 [1:13:19<20:23, 2.02it/s]{'loss': 3.2479, 'grad_norm': 0.2400185614824295, 'learning_rate': 0.00014450160637794786, 'epoch': 10.87}
|
1391 |
+
|
1392 |
|
1393 |
78%|ββββββββ | 8600/11074 [1:13:19<20:23, 2.02it/s]
|
1394 |
78%|ββββββββ | 8601/11074 [1:13:20<20:24, 2.02it/s]
|
1395 |
78%|ββββββββ | 8602/11074 [1:13:20<20:22, 2.02it/s]
|
1396 |
78%|ββββββββ | 8603/11074 [1:13:21<20:21, 2.02it/s]
|
1397 |
78%|ββββββββ | 8604/11074 [1:13:21<20:20, 2.02it/s]
|
1398 |
78%|ββββββββ | 8605/11074 [1:13:22<20:21, 2.02it/s]
|
1399 |
78%|ββββββββ | 8606/11074 [1:13:22<20:20, 2.02it/s]
|
1400 |
78%|ββββββββ | 8607/11074 [1:13:23<20:19, 2.02it/s]
|
1401 |
78%|ββββββββ | 8608/11074 [1:13:23<20:17, 2.02it/s]
|
1402 |
78%|ββββββββ | 8609/11074 [1:13:24<20:18, 2.02it/s]
|
1403 |
78%|ββββββββ | 8610/11074 [1:13:24<20:17, 2.02it/s]
|
1404 |
78%|ββββββββ | 8611/11074 [1:13:25<20:18, 2.02it/s]
|
1405 |
78%|ββββββββ | 8612/11074 [1:13:25<20:16, 2.02it/s]
|
1406 |
78%|ββββββββ | 8613/11074 [1:13:26<20:16, 2.02it/s]
|
1407 |
78%|ββββββββ | 8614/11074 [1:13:26<20:14, 2.02it/s]
|
1408 |
78%|ββββββββ | 8615/11074 [1:13:27<20:15, 2.02it/s]
|
1409 |
78%|ββββββββ | 8616/11074 [1:13:27<20:14, 2.02it/s]
|
1410 |
78%|ββββββββ | 8617/11074 [1:13:28<20:14, 2.02it/s]
|
1411 |
78%|ββββββββ | 8618/11074 [1:13:28<20:13, 2.02it/s]
|
1412 |
78%|ββββββββ | 8619/11074 [1:13:29<20:11, 2.03it/s]
|
1413 |
78%|ββββββββ | 8620/11074 [1:13:29<20:12, 2.02it/s]
|
1414 |
78%|ββββββββ | 8621/11074 [1:13:30<20:10, 2.03it/s]
|
1415 |
78%|ββββββββ | 8622/11074 [1:13:30<20:11, 2.02it/s]
|
1416 |
78%|ββββββββ | 8623/11074 [1:13:31<20:09, 2.03it/s]
|
1417 |
78%|ββββββββ | 8624/11074 [1:13:31<20:09, 2.03it/s]
|
1418 |
78%|ββββββββ | 8625/11074 [1:13:32<20:08, 2.03it/s]{'loss': 3.2424, 'grad_norm': 0.24160033464431763, 'learning_rate': 0.00014174181216083863, 'epoch': 10.9}
|
1419 |
+
|
1420 |
|
1421 |
78%|ββββββββ | 8625/11074 [1:13:32<20:08, 2.03it/s]
|
1422 |
78%|ββββββββ | 8626/11074 [1:13:32<20:14, 2.02it/s]
|
1423 |
78%|ββββββββ | 8627/11074 [1:13:33<20:11, 2.02it/s]
|
1424 |
78%|ββββββββ | 8628/11074 [1:13:33<20:10, 2.02it/s]
|
1425 |
78%|ββββββββ | 8629/11074 [1:13:34<20:08, 2.02it/s]
|
1426 |
78%|ββββββββ | 8630/11074 [1:13:34<20:09, 2.02it/s]
|
1427 |
78%|ββββββββ | 8631/11074 [1:13:35<20:06, 2.02it/s]
|
1428 |
78%|ββββββββ | 8632/11074 [1:13:35<20:08, 2.02it/s]
|
1429 |
78%|ββββββββ | 8633/11074 [1:13:36<20:06, 2.02it/s]
|
1430 |
78%|ββββββββ | 8634/11074 [1:13:36<20:05, 2.02it/s]
|
1431 |
78%|ββββββββ | 8635/11074 [1:13:37<20:04, 2.02it/s]
|
1432 |
78%|ββββββββ | 8636/11074 [1:13:37<20:04, 2.02it/s]
|
1433 |
78%|ββββββββ | 8637/11074 [1:13:38<20:02, 2.03it/s]
|
1434 |
78%|ββββββββ | 8638/11074 [1:13:38<20:02, 2.03it/s]
|
1435 |
78%|ββββββββ | 8639/11074 [1:13:39<20:03, 2.02it/s]
|
1436 |
78%|ββββββββ | 8640/11074 [1:13:39<20:03, 2.02it/s]
|
1437 |
78%|ββββββββ | 8641/11074 [1:13:40<20:02, 2.02it/s]
|
1438 |
78%|ββββββββ | 8642/11074 [1:13:40<20:03, 2.02it/s]
|
1439 |
78%|ββββββββ | 8643/11074 [1:13:41<20:02, 2.02it/s]
|
1440 |
78%|ββββββββ | 8644/11074 [1:13:41<20:02, 2.02it/s]
|
1441 |
78%|ββββββββ | 8645/11074 [1:13:42<20:00, 2.02it/s]
|
1442 |
78%|ββββββββ | 8646/11074 [1:13:42<20:01, 2.02it/s]
|
1443 |
78%|ββββββββ | 8647/11074 [1:13:43<20:00, 2.02it/s]
|
1444 |
78%|ββββββββ | 8648/11074 [1:13:43<20:00, 2.02it/s]
|
1445 |
78%|ββββββββ | 8649/11074 [1:13:44<19:59, 2.02it/s]
|
1446 |
78%|ββββββββ | 8650/11074 [1:13:44<19:59, 2.02it/s]
|
1447 |
{'loss': 3.2466, 'grad_norm': 0.23945370316505432, 'learning_rate': 0.00013900426803896234, 'epoch': 10.93}
|
1448 |
+
|
1449 |
78%|ββββββββ | 8650/11074 [1:13:44<19:59, 2.02it/s]
|
1450 |
78%|ββββββββ | 8651/11074 [1:13:45<19:59, 2.02it/s]
|
1451 |
78%|ββββββββ | 8652/11074 [1:13:45<19:59, 2.02it/s]
|
1452 |
78%|ββββββββ | 8653/11074 [1:13:46<19:57, 2.02it/s]
|
1453 |
78%|ββββββββ | 8654/11074 [1:13:46<19:56, 2.02it/s]
|
1454 |
78%|ββββββββ | 8655/11074 [1:13:47<19:55, 2.02it/s]
|
1455 |
78%|ββββββββ | 8656/11074 [1:13:47<19:56, 2.02it/s]
|
1456 |
78%|ββββββββ | 8657/11074 [1:13:48<19:55, 2.02it/s]
|
1457 |
78%|ββββββββ | 8658/11074 [1:13:48<19:56, 2.02it/s]
|
1458 |
78%|ββββββββ | 8659/11074 [1:13:49<19:54, 2.02it/s]
|
1459 |
78%|ββββββββ | 8660/11074 [1:13:49<19:55, 2.02it/s]
|
1460 |
78%|ββββββββ | 8661/11074 [1:13:49<19:53, 2.02it/s]
|
1461 |
78%|ββββββββ | 8662/11074 [1:13:50<19:52, 2.02it/s]
|
1462 |
78%|ββββββββ | 8663/11074 [1:13:50<19:52, 2.02it/s]
|
1463 |
78%|ββββββββ | 8664/11074 [1:13:51<19:50, 2.02it/s]
|
1464 |
78%|ββββββββ | 8665/11074 [1:13:51<19:51, 2.02it/s]
|
1465 |
78%|ββββββββ | 8666/11074 [1:13:52<19:51, 2.02it/s]
|
1466 |
78%|ββββββββ | 8667/11074 [1:13:52<19:50, 2.02it/s]
|
1467 |
78%|ββββββββ | 8668/11074 [1:13:53<19:48, 2.02it/s]
|
1468 |
78%|ββββββββ | 8669/11074 [1:13:53<19:49, 2.02it/s]
|
1469 |
78%|ββββββββ | 8670/11074 [1:13:54<19:48, 2.02it/s]
|
1470 |
78%|ββββββββ | 8671/11074 [1:13:54<19:48, 2.02it/s]
|
1471 |
78%|ββββββββ | 8672/11074 [1:13:55<19:48, 2.02it/s]
|
1472 |
78%|ββββββββ | 8673/11074 [1:13:55<19:47, 2.02it/s]
|
1473 |
78%|ββββββββ | 8674/11074 [1:13:56<19:47, 2.02it/s]
|
1474 |
78%|ββββββββ | 8675/11074 [1:13:56<19:46, 2.02it/s]
|
1475 |
|
1476 |
+
|
1477 |
78%|ββββββββ | 8675/11074 [1:13:56<19:46, 2.02it/s]
|
1478 |
78%|ββββββββ | 8676/11074 [1:13:57<19:46, 2.02it/s]
|
1479 |
78%|ββββββββ | 8677/11074 [1:13:57<19:45, 2.02it/s]
|
1480 |
78%|ββββββββ | 8678/11074 [1:13:58<19:45, 2.02it/s]
|
1481 |
78%|ββββββββ | 8679/11074 [1:13:58<19:43, 2.02it/s]
|
1482 |
78%|ββββββββ | 8680/11074 [1:13:59<19:44, 2.02it/s]
|
1483 |
78%|ββββββββ | 8681/11074 [1:13:59<19:43, 2.02it/s]
|
1484 |
78%|ββββββββ | 8682/11074 [1:14:00<19:42, 2.02it/s]
|
1485 |
78%|ββββββββ | 8683/11074 [1:14:00<19:42, 2.02it/s]
|
1486 |
78%|ββββββββ | 8684/11074 [1:14:01<19:41, 2.02it/s]
|
1487 |
78%|ββββββββ | 8685/11074 [1:14:01<19:41, 2.02it/s]
|
1488 |
78%|ββββββββ | 8686/11074 [1:14:02<19:42, 2.02it/s]
|
1489 |
78%|ββββββββ | 8687/11074 [1:14:02<19:41, 2.02it/s]
|
1490 |
78%|ββββββββ | 8688/11074 [1:14:03<19:40, 2.02it/s]
|
1491 |
78%|ββββββββ | 8689/11074 [1:14:03<19:39, 2.02it/s]
|
1492 |
78%|ββββββββ | 8690/11074 [1:14:04<19:40, 2.02it/s]
|
1493 |
78%|ββββββββ | 8691/11074 [1:14:04<19:40, 2.02it/s]
|
1494 |
78%|ββββββββ | 8692/11074 [1:14:05<19:38, 2.02it/s]
|
1495 |
78%|ββββββββ | 8693/11074 [1:14:05<19:37, 2.02it/s]
|
1496 |
79%|ββββββββ | 8694/11074 [1:14:06<19:36, 2.02it/s]
|
1497 |
79%|ββββββββ | 8695/11074 [1:14:06<19:36, 2.02it/s]
|
1498 |
79%|ββββββββ | 8696/11074 [1:14:07<19:35, 2.02it/s]
|
1499 |
79%|ββββββββ | 8697/11074 [1:14:07<19:35, 2.02it/s]
|
1500 |
79%|ββββββββ | 8698/11074 [1:14:08<19:35, 2.02it/s]
|
1501 |
79%|ββββββββ | 8699/11074 [1:14:08<19:34, 2.02it/s]
|
1502 |
79%|ββββββββ | 8700/11074 [1:14:09<19:33, 2.02it/s]{'loss': 3.2503, 'grad_norm': 0.24186570942401886, 'learning_rate': 0.000133596608763568, 'epoch': 10.99}
|
1503 |
+
|
1504 |
|
1505 |
79%|ββββββββ | 8700/11074 [1:14:09<19:33, 2.02it/s]
|
1506 |
79%|ββββββββ | 8701/11074 [1:14:09<19:34, 2.02it/s]
|
1507 |
79%|ββββββββ | 8702/11074 [1:14:10<19:34, 2.02it/s]
|
1508 |
79%|ββββββββ | 8703/11074 [1:14:10<19:33, 2.02it/s]
|
1509 |
79%|ββββββββ | 8704/11074 [1:14:11<19:33, 2.02it/s]
|
1510 |
79%|ββββββββ | 8705/11074 [1:14:11<19:31, 2.02it/s]
|
1511 |
79%|ββββββββ | 8706/11074 [1:14:12<20:00, 1.97it/s]
|
1512 |
79%|ββββββββ | 8707/11074 [1:14:24<2:38:17, 4.01s/it]
|
1513 |
79%|ββββββββ | 8708/11074 [1:14:24<1:56:41, 2.96s/it]
|
1514 |
79%|ββββββββ | 8709/11074 [1:14:25<1:27:30, 2.22s/it]
|
1515 |
79%|ββββββββ | 8710/11074 [1:14:25<1:07:12, 1.71s/it]
|
1516 |
79%|ββββββββ | 8711/11074 [1:14:26<52:50, 1.34s/it]
|
1517 |
79%|ββββββββ | 8712/11074 [1:14:26<42:50, 1.09s/it]
|
1518 |
79%|ββββββββ | 8713/11074 [1:14:27<35:46, 1.10it/s]
|
1519 |
79%|ββββββββ | 8714/11074 [1:14:27<30:52, 1.27it/s]
|
1520 |
79%|ββββββββ | 8715/11074 [1:14:28<27:25, 1.43it/s]
|
1521 |
79%|ββββββββ | 8716/11074 [1:14:28<25:00, 1.57it/s]
|
1522 |
79%|ββββββββ | 8717/11074 [1:14:29<23:20, 1.68it/s]
|
1523 |
79%|ββββββββ | 8718/11074 [1:14:29<22:07, 1.77it/s]
|
1524 |
79%|ββββββββ | 8719/11074 [1:14:30<21:26, 1.83it/s]
|
1525 |
79%|ββββββββ | 8720/11074 [1:14:30<20:52, 1.88it/s]
|
1526 |
79%|ββββββββ | 8721/11074 [1:14:31<20:27, 1.92it/s]
|
1527 |
79%|ββββββββ | 8722/11074 [1:14:31<20:06, 1.95it/s]
|
1528 |
79%|ββββββββ | 8723/11074 [1:14:32<19:52, 1.97it/s]
|
1529 |
79%|ββββββββ | 8724/11074 [1:14:32<19:42, 1.99it/s]
|
1530 |
79%|ββββββββ | 8725/11074 [1:14:33<19:37, 2.00it/s]
|
1531 |
{'loss': 3.1825, 'grad_norm': 0.2393781691789627, 'learning_rate': 0.0001309268294598309, 'epoch': 11.02}
|
1532 |
+
|
1533 |
79%|ββββββββ | 8725/11074 [1:14:33<19:37, 2.00it/s]
|
1534 |
79%|ββββββββ | 8726/11074 [1:14:33<19:32, 2.00it/s]
|
1535 |
79%|ββββββββ | 8727/11074 [1:14:34<19:28, 2.01it/s]
|
1536 |
79%|ββββββββ | 8728/11074 [1:14:34<19:27, 2.01it/s]
|
1537 |
79%|ββββββββ | 8729/11074 [1:14:35<19:24, 2.01it/s]
|
1538 |
79%|ββββββββ | 8730/11074 [1:14:35<19:22, 2.02it/s]
|
1539 |
79%|ββββββββ | 8731/11074 [1:14:36<19:23, 2.01it/s]
|
1540 |
79%|ββββββββ | 8732/11074 [1:14:36<19:22, 2.01it/s]
|
1541 |
79%|ββββββββ | 8733/11074 [1:14:37<19:21, 2.01it/s]
|
1542 |
79%|ββββββββ | 8734/11074 [1:14:37<19:20, 2.02it/s]
|
1543 |
79%|ββββββββ | 8735/11074 [1:14:38<19:17, 2.02it/s]
|
1544 |
79%|ββββββββ | 8736/11074 [1:14:38<19:15, 2.02it/s]
|
1545 |
79%|ββββββββ | 8737/11074 [1:14:39<19:15, 2.02it/s]
|