Training in progress, epoch 14
Browse files- logs/events.out.tfevents.1715596086.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +32 -1
logs/events.out.tfevents.1715596086.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96af6e2a9ada45d0e4d122751106ce3acf6f8c8c062f1ad9a8930098eff5f28f
|
3 |
+
size 95277
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f416e698c8b1f33c7778f00de4881492b2b40f0637a9d167917e1a5a2d5fe0e
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -502,4 +502,35 @@ command outputs:
|
|
502 |
|
503 |
|
504 |
92%|ββββββββββ| 9875/10682 [1:33:52<06:46, 1.99it/s]
|
505 |
92%|ββββββββββ| 9876/10682 [1:33:52<06:45, 1.99it/s]
|
506 |
92%|ββββββββββ| 9877/10682 [1:33:53<06:43, 1.99it/s]
|
507 |
92%|ββββββββββ| 9878/10682 [1:33:53<06:43, 1.99it/s]
|
508 |
92%|ββββββββββ| 9879/10682 [1:33:54<06:43, 1.99it/s]
|
509 |
92%|ββββββββββ| 9880/10682 [1:33:54<06:43, 1.99it/s]
|
510 |
93%|ββββββββββ| 9881/10682 [1:33:55<06:43, 1.99it/s]
|
511 |
93%|ββββββββββ| 9882/10682 [1:33:55<06:42, 1.99it/s]
|
512 |
93%|ββββββββββ| 9883/10682 [1:33:56<06:41, 1.99it/s]
|
513 |
93%|ββββββββββ| 9884/10682 [1:33:56<06:40, 1.99it/s]
|
514 |
93%|ββββββββββ| 9885/10682 [1:33:57<06:40, 1.99it/s]
|
515 |
93%|ββββββββββ| 9886/10682 [1:33:57<06:39, 1.99it/s]
|
516 |
93%|ββββββββββ| 9887/10682 [1:33:58<06:39, 1.99it/s]
|
517 |
93%|ββββββββββ| 9888/10682 [1:33:58<06:39, 1.99it/s]
|
518 |
93%|ββββββββββ| 9889/10682 [1:33:59<06:38, 1.99it/s]
|
519 |
93%|ββββββββββ| 9890/10682 [1:33:59<06:38, 1.99it/s]
|
520 |
93%|ββββββββββ| 9891/10682 [1:34:00<06:37, 1.99it/s]
|
521 |
93%|ββββββββββ| 9892/10682 [1:34:00<06:36, 1.99it/s]
|
522 |
93%|ββββββββββ| 9893/10682 [1:34:01<06:36, 1.99it/s]
|
523 |
93%|ββββββββββ| 9894/10682 [1:34:01<06:36, 1.99it/s]
|
524 |
93%|ββββββββββ| 9895/10682 [1:34:02<06:35, 1.99it/s]
|
525 |
93%|ββββββββββ| 9896/10682 [1:34:02<06:34, 1.99it/s]
|
526 |
93%|ββββββββββ| 9897/10682 [1:34:03<06:34, 1.99it/s]
|
527 |
93%|ββββββββββ| 9898/10682 [1:34:03<06:34, 1.99it/s]
|
528 |
93%|ββββββββββ| 9899/10682 [1:34:04<06:33, 1.99it/s]
|
529 |
93%|ββββββββββ| 9900/10682 [1:34:04<06:32, 1.99it/s]{'loss': 3.1411, 'grad_norm': 0.2500966787338257, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.98}
|
530 |
|
531 |
|
532 |
93%|ββββββββββ| 9900/10682 [1:34:04<06:32, 1.99it/s]
|
533 |
93%|ββββββββββ| 9901/10682 [1:34:05<06:32, 1.99it/s]
|
534 |
93%|ββββββββββ| 9902/10682 [1:34:05<06:31, 1.99it/s]
|
535 |
93%|ββββββββββ| 9903/10682 [1:34:06<06:31, 1.99it/s]
|
536 |
93%|ββββββββββ| 9904/10682 [1:34:06<06:31, 1.99it/s]
|
537 |
93%|ββββββββββ| 9905/10682 [1:34:07<06:29, 1.99it/s]
|
538 |
93%|ββββββββββ| 9906/10682 [1:34:07<06:28, 2.00it/s]
|
539 |
93%|ββββββββββ| 9907/10682 [1:34:08<06:28, 1.99it/s]
|
540 |
93%|ββββββββββ| 9908/10682 [1:34:08<06:28, 1.99it/s]
|
541 |
93%|ββββββββββ| 9909/10682 [1:34:09<06:27, 1.99it/s]
|
542 |
93%|ββββββββββ| 9910/10682 [1:34:09<06:27, 1.99it/s]
|
543 |
93%|ββββββββββ| 9911/10682 [1:34:10<06:26, 2.00it/s]
|
544 |
93%|ββββββββββ| 9912/10682 [1:34:10<06:25, 2.00it/s]
|
545 |
93%|ββββββββββ| 9913/10682 [1:34:11<06:25, 1.99it/s]
|
546 |
93%|ββββββββββ| 9914/10682 [1:34:11<06:25, 1.99it/s]
|
547 |
93%|ββββββββββ| 9915/10682 [1:34:12<06:24, 2.00it/s]
|
548 |
93%|ββββββββββ| 9916/10682 [1:34:12<06:23, 2.00it/s]
|
549 |
93%|ββββββββββ| 9917/10682 [1:34:13<06:22, 2.00it/s]
|
550 |
93%|ββββββββββ| 9918/10682 [1:34:13<06:22, 2.00it/s]
|
551 |
93%|ββββββββββ| 9919/10682 [1:34:14<06:17, 2.02it/s]
|
552 |
93%|ββββββββββ| 9920/10682 [1:35:44<5:47:38, 27.37s/it]
|
553 |
93%|ββββββββββ| 9921/10682 [1:35:44<4:04:56, 19.31s/it]
|
554 |
93%|ββββββββββ| 9922/10682 [1:35:45<2:53:07, 13.67s/it]
|
555 |
93%|ββββββββββ| 9923/10682 [1:35:45<2:02:58, 9.72s/it]
|
556 |
93%|ββββββββββ| 9924/10682 [1:35:46<1:27:51, 6.96s/it]
|
557 |
93%|ββββββββββ| 9925/10682 [1:35:46<1:03:19, 5.02s/it]{'loss': 3.1493, 'grad_norm': 0.24704374372959137, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.01}
|
558 |
|
559 |
|
560 |
93%|ββββββββββ| 9925/10682 [1:35:46<1:03:19, 5.02s/it]
|
561 |
93%|ββββββββββ| 9926/10682 [1:35:47<46:09, 3.66s/it]
|
562 |
93%|ββββββββββ| 9927/10682 [1:35:47<34:09, 2.71s/it]
|
563 |
93%|ββββββββββ| 9928/10682 [1:35:48<25:46, 2.05s/it]
|
564 |
93%|ββββββββββ| 9929/10682 [1:35:48<19:53, 1.59s/it]
|
565 |
93%|ββββββββββ| 9930/10682 [1:35:49<15:47, 1.26s/it]
|
566 |
93%|ββββββββββ| 9931/10682 [1:35:49<12:55, 1.03s/it]
|
567 |
93%|ββββββββββ| 9932/10682 [1:35:50<10:54, 1.15it/s]
|
568 |
93%|ββββββββββ| 9933/10682 [1:35:50<09:30, 1.31it/s]
|
569 |
93%|ββββββββββ| 9934/10682 [1:35:51<08:30, 1.47it/s]
|
570 |
93%|βββββββοΏ½οΏ½οΏ½ββ| 9935/10682 [1:35:51<07:49, 1.59it/s]
|
571 |
93%|ββββββββββ| 9936/10682 [1:35:52<07:19, 1.70it/s]
|
572 |
93%|ββββββββββ| 9937/10682 [1:35:52<06:58, 1.78it/s]
|
573 |
93%|ββββββββββ| 9938/10682 [1:35:53<06:44, 1.84it/s]
|
574 |
93%|ββββββββββ| 9939/10682 [1:35:53<06:34, 1.88it/s]
|
575 |
93%|ββββββββββ| 9940/10682 [1:35:54<06:27, 1.92it/s]
|
576 |
93%|ββββββββββ| 9941/10682 [1:35:54<06:21, 1.94it/s]
|
577 |
93%|ββββββββββ| 9942/10682 [1:35:55<06:17, 1.96it/s]
|
578 |
93%|ββββββββββ| 9943/10682 [1:35:55<06:15, 1.97it/s]
|
579 |
93%|ββββββββββ| 9944/10682 [1:35:56<06:13, 1.98it/s]
|
580 |
93%|ββββββββββ| 9945/10682 [1:35:56<06:11, 1.98it/s]
|
581 |
93%|ββββββββββ| 9946/10682 [1:35:57<06:09, 1.99it/s]
|
582 |
93%|ββββββββββ| 9947/10682 [1:35:57<06:09, 1.99it/s]
|
583 |
93%|ββββββββββ| 9948/10682 [1:35:58<06:08, 1.99it/s]
|
584 |
93%|ββββββββββ| 9949/10682 [1:35:58<06:07, 2.00it/s]
|
585 |
93%|ββββββββββ| 9950/10682 [1:35:59<06:06, 2.00it/s]{'loss': 3.1011, 'grad_norm': 0.24837680160999298, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.04}
|
586 |
|
587 |
-
|
588 |
93%|ββββββββββ| 9950/10682 [1:35:59<06:06, 2.00it/s]
|
589 |
93%|ββββββββββ| 9951/10682 [1:35:59<06:06, 2.00it/s]
|
590 |
93%|ββββββββββ| 9952/10682 [1:36:00<06:05, 2.00it/s]
|
591 |
93%|ββββββββββ| 9953/10682 [1:36:00<06:05, 2.00it/s]
|
592 |
93%|ββββββββββ| 9954/10682 [1:36:01<06:04, 2.00it/s]
|
593 |
93%|ββββββββββ| 9955/10682 [1:36:01<06:03, 2.00it/s]
|
594 |
93%|ββββββββββ| 9956/10682 [1:36:02<06:03, 2.00it/s]
|
|
|
595 |
93%|ββββββββββ| 9950/10682 [1:35:59<06:06, 2.00it/s]
|
596 |
93%|ββββββββββ| 9951/10682 [1:35:59<06:06, 2.00it/s]
|
597 |
93%|ββββββββββ| 9952/10682 [1:36:00<06:05, 2.00it/s]
|
598 |
93%|ββββββββββ| 9953/10682 [1:36:00<06:05, 2.00it/s]
|
599 |
93%|ββββββββββ| 9954/10682 [1:36:01<06:04, 2.00it/s]
|
600 |
93%|ββββββββββ| 9955/10682 [1:36:01<06:03, 2.00it/s]
|
601 |
93%|ββββββββββ| 9956/10682 [1:36:02<06:03, 2.00it/s]
|
602 |
93%|ββββββββββ| 9957/10682 [1:36:02<06:03, 1.99it/s]
|
603 |
93%|ββββββββββ| 9958/10682 [1:36:03<06:02, 2.00it/s]
|
604 |
93%|ββββββββββ| 9959/10682 [1:36:03<06:01, 2.00it/s]
|
605 |
93%|ββββββββββ| 9960/10682 [1:36:04<06:01, 2.00it/s]
|
606 |
93%|ββββββββββ| 9961/10682 [1:36:04<06:01, 2.00it/s]
|
607 |
93%|ββββββββββ| 9962/10682 [1:36:05<06:00, 2.00it/s]
|
608 |
93%|ββββββββββ| 9963/10682 [1:36:05<05:59, 2.00it/s]
|
609 |
93%|ββββββββββ| 9964/10682 [1:36:06<05:59, 2.00it/s]
|
610 |
93%|ββββββββββ| 9965/10682 [1:36:06<05:58, 2.00it/s]
|
611 |
93%|ββββββββββ| 9966/10682 [1:36:07<05:58, 2.00it/s]
|
612 |
93%|ββββββββββ| 9967/10682 [1:36:07<05:57, 2.00it/s]
|
613 |
93%|ββββββββββ| 9968/10682 [1:36:08<05:57, 2.00it/s]
|
614 |
93%|ββββββββββ| 9969/10682 [1:36:08<05:56, 2.00it/s]
|
615 |
93%|ββββββββββ| 9970/10682 [1:36:09<05:56, 2.00it/s]
|
616 |
93%|ββββββββββ| 9971/10682 [1:36:09<05:56, 2.00it/s]
|
617 |
93%|ββββββββββ| 9972/10682 [1:36:10<05:55, 2.00it/s]
|
618 |
93%|ββββββββββ| 9973/10682 [1:36:10<05:54, 2.00it/s]
|
619 |
93%|ββββββββββ| 9974/10682 [1:36:11<05:54, 2.00it/s]
|
620 |
93%|ββββββββββ| 9975/10682 [1:36:11<05:53, 2.00it/s]
|
621 |
{'loss': 3.1115, 'grad_norm': 0.24749550223350525, 'learning_rate': 1.3287025325307511e-05, 'epoch': 13.07}
|
|
|
622 |
93%|ββββββββββ| 9975/10682 [1:36:11<05:53, 2.00it/s]
|
623 |
93%|ββββββββββ| 9976/10682 [1:36:12<05:53, 2.00it/s]
|
624 |
93%|ββββββββββ| 9977/10682 [1:36:12<05:52, 2.00it/s]
|
625 |
93%|ββββββββββ| 9978/10682 [1:36:13<05:52, 2.00it/s]
|
626 |
93%|ββββββββββ| 9979/10682 [1:36:13<05:51, 2.00it/s]
|
627 |
93%|ββββββββββ| 9980/10682 [1:36:14<05:51, 2.00it/s]
|
628 |
93%|ββββββββββ| 9981/10682 [1:36:14<05:51, 2.00it/s]
|
629 |
93%|ββββββββββ| 9982/10682 [1:36:15<05:50, 2.00it/s]
|
630 |
93%|ββββββββββ| 9983/10682 [1:36:15<05:50, 2.00it/s]
|
631 |
93%|ββββββββββ| 9984/10682 [1:36:16<05:49, 2.00it/s]
|
632 |
93%|ββββββββββ| 9985/10682 [1:36:16<05:48, 2.00it/s]
|
633 |
93%|ββββββββββ| 9986/10682 [1:36:17<05:48, 2.00it/s]
|
634 |
93%|ββββββββββ| 9987/10682 [1:36:17<05:48, 2.00it/s]
|
635 |
94%|ββββββββββ| 9988/10682 [1:36:18<05:48, 1.99it/s]
|
636 |
94%|ββββββββββ| 9989/10682 [1:36:18<05:47, 1.99it/s]
|
637 |
94%|ββββββββββ| 9990/10682 [1:36:19<05:46, 1.99it/s]
|
638 |
94%|ββββββββββ| 9991/10682 [1:36:19<05:45, 2.00it/s]
|
639 |
94%|ββββββββββ| 9992/10682 [1:36:20<05:45, 2.00it/s]
|
640 |
94%|ββββββββββ| 9993/10682 [1:36:20<05:45, 2.00it/s]
|
641 |
94%|ββββββββββ| 9994/10682 [1:36:21<05:45, 1.99it/s]
|
642 |
94%|ββββββββββ| 9995/10682 [1:36:21<05:45, 1.99it/s]
|
643 |
94%|ββββββββββ| 9996/10682 [1:36:22<05:44, 1.99it/s]
|
644 |
94%|ββββββββββ| 9997/10682 [1:36:22<05:43, 2.00it/s]
|
645 |
94%|ββββββββββ| 9998/10682 [1:36:23<05:42, 2.00it/s]
|
646 |
94%|ββββββββββ| 9999/10682 [1:36:23<05:42, 1.99it/s]
|
647 |
94%|ββββββββββ| 10000/10682 [1:36:24<05:42, 1.99it/s]
|
648 |
|
|
|
649 |
94%|ββββββββββ| 10000/10682 [1:36:24<05:42, 1.99it/s]
|
650 |
94%|ββββββββββ| 10001/10682 [1:36:24<05:41, 1.99it/s]
|
651 |
94%|ββββββββββ| 10002/10682 [1:36:25<05:40, 2.00it/s]
|
652 |
94%|ββββββββββ| 10003/10682 [1:36:25<05:39, 2.00it/s]
|
653 |
94%|ββββββββββ| 10004/10682 [1:36:26<05:40, 1.99it/s]
|
654 |
94%|ββββββββββ| 10005/10682 [1:36:26<05:39, 1.99it/s]
|
655 |
94%|ββββββββββ| 10006/10682 [1:36:27<05:39, 1.99it/s]
|
656 |
94%|ββββββββββ| 10007/10682 [1:36:27<05:38, 2.00it/s]
|
657 |
94%|ββββββββββ| 10008/10682 [1:36:28<05:37, 2.00it/s]
|
658 |
94%|ββββββββββ| 10009/10682 [1:36:28<05:36, 2.00it/s]
|
659 |
94%|ββββββββββ| 10010/10682 [1:36:29<05:36, 1.99it/s]
|
660 |
94%|ββββββββββ| 10011/10682 [1:36:29<05:36, 2.00it/s]
|
661 |
94%|ββββββββββ| 10012/10682 [1:36:30<05:35, 2.00it/s]
|
662 |
94%|ββββββββββ| 10013/10682 [1:36:30<05:34, 2.00it/s]
|
663 |
94%|ββββββββββ| 10014/10682 [1:36:31<05:34, 2.00it/s]
|
664 |
94%|ββββββββββ| 10015/10682 [1:36:31<05:34, 1.99it/s]
|
665 |
94%|ββββββββββ| 10016/10682 [1:36:32<05:33, 1.99it/s]
|
666 |
94%|ββββββββββ| 10017/10682 [1:36:32<05:32, 2.00it/s]
|
667 |
94%|ββββββββββ| 10018/10682 [1:36:33<05:32, 2.00it/s]
|
668 |
94%|ββββββββββ| 10019/10682 [1:36:33<05:32, 2.00it/s]
|
669 |
94%|ββββββββββ| 10020/10682 [1:36:34<05:31, 1.99it/s]
|
670 |
94%|ββββββββββ| 10021/10682 [1:36:34<05:31, 1.99it/s]
|
671 |
94%|ββββββββββ| 10022/10682 [1:36:35<05:31, 1.99it/s]
|
672 |
94%|ββββββββββ| 10023/10682 [1:36:35<05:30, 1.99it/s]
|
673 |
94%|ββββββββββ| 10024/10682 [1:36:36<05:29, 2.00it/s]
|
674 |
94%|ββββββββββ| 10025/10682 [1:36:36<05:29, 2.00it/s]
|
675 |
{'loss': 3.1075, 'grad_norm': 0.24997055530548096, 'learning_rate': 1.1481100210606388e-05, 'epoch': 13.14}
|
|
|
676 |
94%|ββββββββββ| 10025/10682 [1:36:36<05:29, 2.00it/s]
|
677 |
94%|ββββββββββ| 10026/10682 [1:36:37<05:29, 1.99it/s]
|
678 |
94%|ββββββββββ| 10027/10682 [1:36:37<05:28, 1.99it/s]
|
679 |
94%|ββββββββββ| 10028/10682 [1:36:38<05:27, 1.99it/s]
|
680 |
94%|ββββββββββ| 10029/10682 [1:36:38<05:26, 2.00it/s]
|
681 |
94%|ββββββββββ| 10030/10682 [1:36:39<05:26, 2.00it/s]
|
682 |
94%|ββββββββββ| 10031/10682 [1:36:39<05:26, 2.00it/s]
|
683 |
94%|ββββββββββ| 10032/10682 [1:36:40<05:25, 1.99it/s]
|
684 |
94%|ββββββββββ| 10033/10682 [1:36:40<05:25, 2.00it/s]
|
685 |
94%|ββββββββββ| 10034/10682 [1:36:41<05:24, 1.99it/s]
|
686 |
94%|ββββββββββ| 10035/10682 [1:36:41<05:24, 1.99it/s]
|
687 |
94%|ββββββββββ| 10036/10682 [1:36:42<05:23, 1.99it/s]
|
688 |
94%|ββββββββββ| 10037/10682 [1:36:42<05:23, 2.00it/s]
|
689 |
94%|ββββββββββ| 10038/10682 [1:36:43<05:22, 2.00it/s]
|
690 |
94%|ββββββββββ| 10039/10682 [1:36:43<05:22, 2.00it/s]
|
691 |
94%|ββββββββββ| 10040/10682 [1:36:44<05:21, 2.00it/s]
|
692 |
94%|ββββββββββ| 10041/10682 [1:36:44<05:20, 2.00it/s]
|
693 |
94%|ββββββββββ| 10042/10682 [1:36:45<05:20, 2.00it/s]
|
694 |
94%|ββββββββββ| 10043/10682 [1:36:45<05:19, 2.00it/s]
|
695 |
94%|ββββββββββ| 10044/10682 [1:36:46<05:19, 2.00it/s]
|
696 |
94%|ββββββββββ| 10045/10682 [1:36:46<05:18, 2.00it/s]
|
697 |
94%|ββββββββββ| 10046/10682 [1:36:47<05:18, 2.00it/s]
|
698 |
94%|ββββββββββ| 10047/10682 [1:36:47<05:17, 2.00it/s]
|
699 |
94%|ββββββββββ| 10048/10682 [1:36:48<05:17, 2.00it/s]
|
700 |
94%|ββββββββββ| 10049/10682 [1:36:48<05:16, 2.00it/s]
|
701 |
94%|ββββββββββ| 10050/10682 [1:36:49<05:16, 2.00it/s]
|
702 |
|
|
|
703 |
94%|ββββββββββ| 10050/10682 [1:36:49<05:16, 2.00it/s]
|
704 |
94%|ββββββββββ| 10051/10682 [1:36:49<05:16, 2.00it/s]
|
705 |
94%|ββββββββββ| 10052/10682 [1:36:50<05:15, 2.00it/s]
|
706 |
94%|ββββββββββ| 10053/10682 [1:36:50<05:15, 2.00it/s]
|
707 |
94%|ββββββββββ| 10054/10682 [1:36:51<05:14, 1.99it/s]
|
708 |
94%|ββββββββββ| 10055/10682 [1:36:51<05:14, 1.99it/s]
|
709 |
94%|ββββββββββ| 10056/10682 [1:36:52<05:13, 2.00it/s]
|
710 |
94%|ββββββββββ| 10057/10682 [1:36:52<05:12, 2.00it/s]
|
711 |
94%|ββββββββββ| 10058/10682 [1:36:53<05:13, 1.99it/s]
|
712 |
94%|ββββββββββ| 10059/10682 [1:36:53<05:12, 1.99it/s]
|
713 |
94%|ββββββββββ| 10060/10682 [1:36:54<05:11, 1.99it/s]
|
714 |
94%|ββββββββββ| 10061/10682 [1:36:54<05:10, 2.00it/s]
|
715 |
94%|ββββββββββ| 10062/10682 [1:36:55<05:10, 2.00it/s]
|
716 |
94%|ββββββββββ| 10063/10682 [1:36:55<05:10, 2.00it/s]
|
717 |
94%|ββββββββββ| 10064/10682 [1:36:56<05:10, 1.99it/s]
|
718 |
94%|ββββββββββ| 10065/10682 [1:36:56<05:09, 1.99it/s]
|
719 |
94%|ββββββββββ| 10066/10682 [1:36:57<05:09, 1.99it/s]
|
720 |
94%|ββββββββββ| 10067/10682 [1:36:57<05:08, 1.99it/s]
|
721 |
94%|ββββββββββ| 10068/10682 [1:36:58<05:07, 1.99it/s]
|
722 |
94%|ββββββββββ| 10069/10682 [1:36:58<05:07, 1.99it/s]
|
723 |
94%|ββββββββββ| 10070/10682 [1:36:59<05:06, 1.99it/s]
|
724 |
94%|ββββββββββ| 10071/10682 [1:36:59<05:06, 1.99it/s]
|
725 |
94%|ββββββββββ| 10072/10682 [1:37:00<05:05, 1.99it/s]
|
726 |
94%|ββββββββββ| 10073/10682 [1:37:00<05:05, 2.00it/s]
|
727 |
94%|ββββββββββ| 10074/10682 [1:37:01<05:04, 2.00it/s]
|
728 |
94%|ββββββββββ| 10075/10682 [1:37:01<05:04, 1.99it/s]
|
729 |
|
|
|
730 |
94%|ββββββββββ| 10075/10682 [1:37:01<05:04, 1.99it/s]
|
731 |
94%|ββββββββββ| 10076/10682 [1:37:02<05:04, 1.99it/s]
|
732 |
94%|ββββββββββ| 10077/10682 [1:37:02<05:03, 1.99it/s]
|
733 |
94%|ββββββββββ| 10078/10682 [1:37:03<05:02, 1.99it/s]
|
734 |
94%|ββββββββββ| 10079/10682 [1:37:03<05:02, 2.00it/s]
|
735 |
94%|ββββββββββ| 10080/10682 [1:37:04<05:01, 1.99it/s]
|
736 |
94%|ββββββββββ| 10081/10682 [1:37:04<05:01, 1.99it/s]
|
737 |
94%|ββββββββββ| 10082/10682 [1:37:05<05:01, 1.99it/s]
|
738 |
94%|ββββββββββ| 10083/10682 [1:37:05<05:00, 1.99it/s]
|
739 |
94%|ββββββββββ| 10084/10682 [1:37:06<04:59, 1.99it/s]
|
740 |
94%|ββββββββββ| 10085/10682 [1:37:06<04:59, 2.00it/s]
|
741 |
94%|ββββββββββ| 10086/10682 [1:37:07<04:58, 1.99it/s]
|
742 |
94%|ββββββββββ| 10087/10682 [1:37:07<04:58, 1.99it/s]
|
743 |
94%|ββββββββββ| 10088/10682 [1:37:08<04:57, 1.99it/s]
|
744 |
94%|ββββββββββ| 10089/10682 [1:37:08<04:57, 2.00it/s]
|
745 |
94%|ββββββββββ| 10090/10682 [1:37:09<04:56, 2.00it/s]
|
746 |
94%|ββββββββββ| 10091/10682 [1:37:09<04:56, 2.00it/s]
|
747 |
94%|ββββββββββ| 10092/10682 [1:37:10<04:55, 2.00it/s]
|
748 |
94%|ββββββββββ| 10093/10682 [1:37:10<04:54, 2.00it/s]
|
749 |
94%|ββββββββββ| 10094/10682 [1:37:11<04:54, 2.00it/s]
|
750 |
95%|ββββββββββ| 10095/10682 [1:37:11<04:54, 1.99it/s]
|
751 |
95%|ββββββββββ| 10096/10682 [1:37:12<04:53, 2.00it/s]
|
752 |
95%|ββββββββββ| 10097/10682 [1:37:12<04:52, 2.00it/s]
|
753 |
95%|ββββββββββ| 10098/10682 [1:37:13<04:52, 2.00it/s]
|
754 |
95%|ββββββββββ| 10099/10682 [1:37:13<04:51, 2.00it/s]
|
755 |
95%|ββββββββββ| 10100/10682 [1:37:14<04:51, 2.00it/s]
|
756 |
|
|
|
757 |
95%|ββββββββββ| 10100/10682 [1:37:14<04:51, 2.00it/s]
|
758 |
95%|ββββββββββ| 10101/10682 [1:37:15<05:15, 1.84it/s]
|
759 |
95%|ββββββββββ| 10102/10682 [1:37:15<05:08, 1.88it/s]
|
760 |
95%|ββββββββββ| 10103/10682 [1:37:16<05:02, 1.92it/s]
|
761 |
95%|ββββββββββ| 10104/10682 [1:37:16<04:57, 1.94it/s]
|
762 |
95%|ββββββββββ| 10105/10682 [1:37:17<04:55, 1.96it/s]
|
763 |
95%|ββββββββββ| 10106/10682 [1:37:17<04:53, 1.97it/s]
|
764 |
95%|ββββββββββ| 10107/10682 [1:37:18<04:51, 1.97it/s]
|
765 |
95%|ββββββββββ| 10108/10682 [1:37:18<04:49, 1.98it/s]
|
766 |
95%|ββββββββββ| 10109/10682 [1:37:19<04:48, 1.99it/s]
|
767 |
95%|ββββββββββ| 10110/10682 [1:37:19<04:47, 1.99it/s]
|
768 |
95%|ββββββββββ| 10111/10682 [1:37:20<04:46, 1.99it/s]
|
769 |
95%|ββββββββββ| 10112/10682 [1:37:20<04:45, 1.99it/s]
|
770 |
95%|ββββββββββ| 10113/10682 [1:37:21<04:45, 1.99it/s]
|
771 |
95%|ββββββββββ| 10114/10682 [1:37:21<04:44, 2.00it/s]
|
772 |
95%|ββββββββββ| 10115/10682 [1:37:22<04:44, 2.00it/s]
|
773 |
95%|ββββββββββ| 10116/10682 [1:37:22<04:43, 1.99it/s]
|
774 |
95%|ββββββββββ| 10117/10682 [1:37:23<04:43, 1.99it/s]
|
775 |
95%|ββββββββββ| 10118/10682 [1:37:23<04:42, 2.00it/s]
|
776 |
95%|ββββββββββ| 10119/10682 [1:37:24<04:42, 2.00it/s]
|
777 |
95%|ββββββββββ| 10120/10682 [1:37:24<04:41, 2.00it/s]
|
778 |
95%|ββββββββββ| 10121/10682 [1:37:25<04:41, 1.99it/s]
|
779 |
95%|ββββββββββ| 10122/10682 [1:37:25<04:41, 1.99it/s]
|
780 |
95%|ββββββββββ| 10123/10682 [1:37:26<04:40, 1.99it/s]
|
781 |
95%|ββββββββββ| 10124/10682 [1:37:26<04:39, 2.00it/s]
|
782 |
95%|ββββββββββ| 10125/10682 [1:37:27<04:39, 2.00it/s]{'loss': 3.1083, 'grad_norm': 0.24928326904773712, 'learning_rate': 8.261001828055447e-06, 'epoch': 13.27}
|
783 |
|
|
|
784 |
95%|ββββββββββ| 10125/10682 [1:37:27<04:39, 2.00it/s]
|
785 |
95%|ββββββββββ| 10126/10682 [1:37:27<04:38, 1.99it/s]
|
786 |
95%|ββββββββββ| 10127/10682 [1:37:28<04:38, 1.99it/s]
|
787 |
95%|ββββββββββ| 10128/10682 [1:37:28<04:37, 2.00it/s]
|
788 |
95%|ββββββββββ| 10129/10682 [1:37:29<04:36, 2.00it/s]
|
789 |
95%|ββββββββββ| 10130/10682 [1:37:29<04:36, 2.00it/s]
|
790 |
95%|ββββββββββ| 10131/10682 [1:37:30<04:36, 1.99it/s]
|
791 |
95%|ββββββββββ| 10132/10682 [1:37:30<04:36, 1.99it/s]
|
792 |
95%|ββββββββββ| 10133/10682 [1:37:31<04:35, 1.99it/s]
|
793 |
95%|ββββββββββ| 10134/10682 [1:37:31<04:34, 1.99it/s]
|
794 |
95%|ββββββββββ| 10135/10682 [1:37:32<04:33, 2.00it/s]
|
795 |
95%|ββββββββββ| 10136/10682 [1:37:32<04:33, 2.00it/s]
|
796 |
95%|ββββββββββ| 10137/10682 [1:37:33<04:32, 2.00it/s]
|
797 |
95%|ββββββββββ| 10138/10682 [1:37:33<04:32, 1.99it/s]
|
798 |
95%|ββββββββββ| 10139/10682 [1:37:34<04:32, 1.99it/s]
|
799 |
95%|ββββββββββ| 10140/10682 [1:37:34<04:32, 1.99it/s]
|
800 |
95%|ββββββββββ| 10141/10682 [1:37:35<04:31, 2.00it/s]
|
801 |
95%|ββββββββββ| 10142/10682 [1:37:35<04:30, 2.00it/s]
|
802 |
95%|ββββββββββ| 10143/10682 [1:37:36<04:30, 1.99it/s]
|
803 |
95%|ββββββββββ| 10144/10682 [1:37:36<04:29, 1.99it/s]
|
804 |
95%|ββββββββββ| 10145/10682 [1:37:37<04:29, 1.99it/s]
|
805 |
95%|ββββββββββ| 10146/10682 [1:37:37<04:28, 1.99it/s]
|
806 |
95%|ββββββββββ| 10147/10682 [1:37:38<04:27, 2.00it/s]
|
807 |
95%|ββββββββββ| 10148/10682 [1:37:38<04:27, 2.00it/s]
|
808 |
95%|ββββββββββ| 10149/10682 [1:37:39<04:27, 1.99it/s]
|
809 |
95%|ββββββββββ| 10150/10682 [1:37:39<04:27, 1.99it/s]{'loss': 3.1143, 'grad_norm': 0.2494010180234909, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.3}
|
|
|
810 |
|
811 |
95%|ββββββββββ| 10150/10682 [1:37:39<04:27, 1.99it/s]
|
812 |
95%|ββββββββββ| 10151/10682 [1:37:40<04:26, 1.99it/s]
|
813 |
95%|ββββββββββ| 10152/10682 [1:37:40<04:25, 1.99it/s]
|
814 |
95%|ββββββββββ| 10153/10682 [1:37:41<04:25, 1.99it/s]
|
815 |
95%|ββββββββββ| 10154/10682 [1:37:41<04:24, 1.99it/s]
|
816 |
95%|ββββββββββ| 10155/10682 [1:37:42<04:24, 1.99it/s]
|
817 |
95%|ββββββββββ| 10156/10682 [1:37:42<04:23, 1.99it/s]
|
818 |
95%|ββββββββββ| 10157/10682 [1:37:43<04:23, 1.99it/s]
|
819 |
95%|ββββββββββ| 10158/10682 [1:37:43<04:22, 2.00it/s]
|
820 |
95%|ββββββββββ| 10159/10682 [1:37:44<04:22, 2.00it/s]
|
821 |
95%|ββββββββββ| 10160/10682 [1:37:44<04:21, 2.00it/s]
|
822 |
95%|ββββββββββ| 10161/10682 [1:37:45<04:20, 2.00it/s]
|
823 |
95%|ββββββββββ| 10162/10682 [1:37:45<04:20, 2.00it/s]
|
824 |
95%|ββββββββββ| 10163/10682 [1:37:46<04:19, 2.00it/s]
|
825 |
95%|ββββββββββ| 10164/10682 [1:37:46<04:19, 2.00it/s]
|
826 |
95%|ββββββββββ| 10165/10682 [1:37:47<04:19, 1.99it/s]
|
827 |
95%|ββββββββββ| 10166/10682 [1:37:47<04:18, 2.00it/s]
|
828 |
95%|ββββββββββ| 10167/10682 [1:37:48<04:17, 2.00it/s]
|
829 |
95%|ββββββββββ| 10168/10682 [1:37:48<04:17, 2.00it/s]
|
830 |
95%|ββββββββββ| 10169/10682 [1:37:49<04:16, 2.00it/s]
|
831 |
95%|ββββββββββ| 10170/10682 [1:37:49<04:16, 2.00it/s]
|
832 |
95%|ββββββββββ| 10171/10682 [1:37:50<04:15, 2.00it/s]
|
833 |
95%|ββββββββββ| 10172/10682 [1:37:50<04:15, 2.00it/s]
|
834 |
95%|ββββββββββ| 10173/10682 [1:37:51<04:15, 1.99it/s]
|
835 |
95%|ββββββββββ| 10174/10682 [1:37:51<04:15, 1.99it/s]
|
836 |
95%|ββββββββββ| 10175/10682 [1:37:52<04:15, 1.99it/s]{'loss': 3.1081, 'grad_norm': 0.24675361812114716, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.34}
|
|
|
837 |
|
838 |
95%|ββββββββββ| 10175/10682 [1:37:52<04:15, 1.99it/s]
|
839 |
95%|ββββββββββ| 10176/10682 [1:37:52<04:14, 1.99it/s]
|
840 |
95%|ββββββββββ| 10177/10682 [1:37:53<04:13, 1.99it/s]
|
841 |
95%|ββββββββββ| 10178/10682 [1:37:53<04:12, 1.99it/s]
|
842 |
95%|ββββββββββ| 10179/10682 [1:37:54<04:12, 1.99it/s]
|
843 |
95%|ββββββββββ| 10180/10682 [1:37:54<04:12, 1.99it/s]
|
844 |
95%|ββββββββββ| 10181/10682 [1:37:55<04:11, 1.99it/s]
|
845 |
95%|ββββββββββ| 10182/10682 [1:37:55<04:10, 1.99it/s]
|
846 |
95%|ββββββββββ| 10183/10682 [1:37:56<04:10, 1.99it/s]
|
847 |
95%|ββββββββββ| 10184/10682 [1:37:56<04:09, 1.99it/s]
|
848 |
95%|ββββββββββ| 10185/10682 [1:37:57<04:09, 1.99it/s]
|
849 |
95%|ββββββββββ| 10186/10682 [1:37:57<04:08, 2.00it/s]
|
850 |
95%|ββββββββββ| 10187/10682 [1:37:58<04:08, 2.00it/s]
|
851 |
95%|ββββββββββ| 10188/10682 [1:37:58<04:07, 1.99it/s]
|
852 |
95%|ββββββββββ| 10189/10682 [1:37:59<04:07, 1.99it/s]
|
853 |
95%|ββββββββββ| 10190/10682 [1:37:59<04:06, 1.99it/s]
|
854 |
95%|ββββββββββ| 10191/10682 [1:38:00<04:06, 1.99it/s]
|
855 |
95%|ββββββββββ| 10192/10682 [1:38:00<04:05, 1.99it/s]
|
856 |
95%|ββββββββββ| 10193/10682 [1:38:01<04:05, 2.00it/s]
|
857 |
95%|ββββββββββ| 10194/10682 [1:38:01<04:04, 1.99it/s]
|
858 |
95%|ββββββββββ| 10195/10682 [1:38:02<04:04, 1.99it/s]
|
859 |
95%|ββββββββββ| 10196/10682 [1:38:02<04:03, 1.99it/s]
|
860 |
95%|ββββββββββ| 10197/10682 [1:38:03<04:03, 1.99it/s]
|
861 |
95%|ββββββββββ| 10198/10682 [1:38:03<04:02, 2.00it/s]
|
862 |
95%|ββββββββββ| 10199/10682 [1:38:04<04:02, 1.99it/s]
|
863 |
95%|ββββββββββ| 10200/10682 [1:38:04<04:01, 1.99it/s]{'loss': 3.1186, 'grad_norm': 0.24594959616661072, 'learning_rate': 6.190386348572108e-06, 'epoch': 13.37}
|
864 |
|
|
|
865 |
95%|ββββββββββ| 10200/10682 [1:38:04<04:01, 1.99it/s]
|
866 |
95%|ββββββββββ| 10201/10682 [1:38:05<04:01, 1.99it/s]
|
867 |
96%|ββββββββββ| 10202/10682 [1:38:05<04:00, 1.99it/s]
|
868 |
96%|ββββββββββ| 10203/10682 [1:38:06<03:59, 2.00it/s]
|
869 |
96%|ββββββββββ| 10204/10682 [1:38:06<03:59, 2.00it/s]
|
870 |
96%|ββββββββββ| 10205/10682 [1:38:07<03:58, 2.00it/s]
|
871 |
96%|ββββββββββ| 10206/10682 [1:38:07<03:58, 1.99it/s]
|
872 |
96%|ββββββββββ| 10207/10682 [1:38:08<03:58, 1.99it/s]
|
873 |
96%|ββββββββββ| 10208/10682 [1:38:08<03:57, 2.00it/s]
|
874 |
96%|ββββββββββ| 10209/10682 [1:38:09<03:56, 2.00it/s]
|
875 |
96%|ββββββββββ| 10210/10682 [1:38:09<03:56, 2.00it/s]
|
876 |
96%|ββββββββββ| 10211/10682 [1:38:10<04:16, 1.83it/s]
|
877 |
96%|ββββββββββ| 10212/10682 [1:38:10<04:10, 1.88it/s]
|
878 |
96%|ββββββββββ| 10213/10682 [1:38:11<04:05, 1.91it/s]
|
879 |
96%|ββββββββββ| 10214/10682 [1:38:11<04:01, 1.94it/s]
|
880 |
96%|ββββββββββ| 10215/10682 [1:38:12<03:58, 1.96it/s]
|
881 |
96%|ββββββββββ| 10216/10682 [1:38:12<03:57, 1.97it/s]
|
882 |
96%|ββββββββββ| 10217/10682 [1:38:13<03:55, 1.97it/s]
|
883 |
96%|ββββββββββ| 10218/10682 [1:38:13<03:54, 1.98it/s]
|
884 |
96%|ββββββββββ| 10219/10682 [1:38:14<03:53, 1.98it/s]
|
885 |
96%|ββββββββββ| 10220/10682 [1:38:14<03:52, 1.99it/s]
|
886 |
96%|ββββββββββ| 10221/10682 [1:38:15<03:51, 1.99it/s]
|
887 |
96%|ββββββββββ| 10222/10682 [1:38:15<03:51, 1.99it/s]
|
888 |
96%|ββββββββββ| 10223/10682 [1:38:16<03:50, 1.99it/s]
|
889 |
96%|ββββββββββ| 10224/10682 [1:38:16<03:50, 1.99it/s]
|
890 |
96%|ββββββββββ| 10225/10682 [1:38:17<03:49, 1.99it/s]
|
891 |
{'loss': 3.1084, 'grad_norm': 0.24636626243591309, 'learning_rate': 5.56604678228706e-06, 'epoch': 13.4}
|
|
|
892 |
96%|ββββββββββ| 10225/10682 [1:38:17<03:49, 1.99it/s]
|
893 |
96%|ββββββββββ| 10226/10682 [1:38:17<03:48, 1.99it/s]
|
894 |
96%|ββββββββββ| 10227/10682 [1:38:18<03:48, 1.99it/s]
|
895 |
96%|ββββββββββ| 10228/10682 [1:38:18<03:47, 1.99it/s]
|
896 |
96%|ββββββββββ| 10229/10682 [1:38:19<03:46, 2.00it/s]
|
897 |
96%|ββββββββββ| 10230/10682 [1:38:19<03:46, 1.99it/s]
|
898 |
96%|ββββββββββ| 10231/10682 [1:38:20<03:46, 1.99it/s]
|
899 |
96%|ββββββββββ| 10232/10682 [1:38:20<03:46, 1.99it/s]
|
900 |
96%|ββββββββββ| 10233/10682 [1:38:21<03:45, 1.99it/s]
|
901 |
96%|ββββββββββ| 10234/10682 [1:38:21<03:44, 1.99it/s]
|
902 |
96%|ββββββββββ| 10235/10682 [1:38:22<03:44, 1.99it/s]
|
903 |
96%|ββββββββββ| 10236/10682 [1:38:22<03:43, 1.99it/s]
|
904 |
96%|ββββββββββ| 10237/10682 [1:38:23<03:43, 1.99it/s]
|
905 |
96%|ββββββββββ| 10238/10682 [1:38:23<03:42, 1.99it/s]
|
906 |
96%|ββββββββββ| 10239/10682 [1:38:24<03:43, 1.98it/s]
|
907 |
96%|ββββββββββ| 10240/10682 [1:38:24<03:42, 1.99it/s]
|
908 |
96%|ββββββββββ| 10241/10682 [1:38:25<03:41, 1.99it/s]
|
909 |
96%|ββββββββββ| 10242/10682 [1:38:25<03:41, 1.99it/s]
|
910 |
96%|ββββββββββ| 10243/10682 [1:38:26<03:40, 1.99it/s]
|
911 |
96%|ββββββββββ| 10244/10682 [1:38:26<03:39, 1.99it/s]
|
912 |
96%|ββββββββββ| 10245/10682 [1:38:27<03:39, 1.99it/s]
|
913 |
96%|ββββββββββ| 10246/10682 [1:38:27<03:38, 1.99it/s]
|
914 |
96%|ββββββββββ| 10247/10682 [1:38:28<03:38, 1.99it/s]
|
915 |
96%|ββββββββββ| 10248/10682 [1:38:28<03:37, 1.99it/s]
|
916 |
96%|ββββββββββ| 10249/10682 [1:38:29<03:37, 1.99it/s]
|
917 |
96%|ββββββββββ| 10250/10682 [1:38:29<03:36, 2.00it/s]
|
918 |
{'loss': 3.1113, 'grad_norm': 0.24913303554058075, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
|
|
919 |
96%|ββββββββββ| 10250/10682 [1:38:29<03:36, 2.00it/s]
|
920 |
96%|ββββββββββ| 10251/10682 [1:38:30<03:36, 1.99it/s]
|
921 |
96%|ββββββββββ| 10252/10682 [1:38:30<03:35, 1.99it/s]
|
922 |
96%|ββββββββββ| 10253/10682 [1:38:31<03:35, 1.99it/s]
|
923 |
96%|ββββββββββ| 10254/10682 [1:38:31<03:34, 1.99it/s]
|
924 |
96%|ββββββββββ| 10255/10682 [1:38:32<03:34, 1.99it/s]
|
925 |
96%|ββββββββββ| 10256/10682 [1:38:32<03:33, 1.99it/s]
|
926 |
96%|ββββββββββ| 10257/10682 [1:38:33<03:33, 1.99it/s]
|
927 |
96%|ββββββββββ| 10258/10682 [1:38:33<03:32, 1.99it/s]
|
928 |
96%|ββββββββββ| 10259/10682 [1:38:34<03:32, 1.99it/s]
|
929 |
96%|ββββββββββ| 10260/10682 [1:38:34<03:31, 1.99it/s]
|
930 |
96%|ββββββββββ| 10261/10682 [1:38:35<03:31, 1.99it/s]
|
931 |
96%|ββββββββββ| 10262/10682 [1:38:35<03:30, 1.99it/s]
|
932 |
96%|ββββββββββ| 10263/10682 [1:38:36<03:30, 1.99it/s]
|
933 |
96%|ββββββββββ| 10264/10682 [1:38:36<03:29, 1.99it/s]
|
934 |
96%|ββββββββββ| 10265/10682 [1:38:37<03:29, 1.99it/s]
|
935 |
96%|ββββββββββ| 10266/10682 [1:38:37<03:29, 1.99it/s]
|
936 |
96%|ββββββββββ| 10267/10682 [1:38:38<03:28, 1.99it/s]
|
937 |
96%|ββββββββββ| 10268/10682 [1:38:38<03:28, 1.99it/s]
|
938 |
96%|ββββββββββ| 10269/10682 [1:38:39<03:27, 1.99it/s]
|
939 |
96%|ββββββββββ| 10270/10682 [1:38:39<03:26, 1.99it/s]
|
940 |
96%|ββββββββββ| 10271/10682 [1:38:40<03:26, 1.99it/s]
|
941 |
96%|ββββββββββ| 10272/10682 [1:38:40<03:25, 1.99it/s]
|
942 |
96%|ββββββββββ| 10273/10682 [1:38:41<03:25, 1.99it/s]
|
943 |
96%|ββββββββββ| 10274/10682 [1:38:41<03:24, 1.99it/s]
|
944 |
96%|ββββββββββ| 10275/10682 [1:38:42<03:24, 1.99it/s]
|
945 |
|
|
|
946 |
96%|ββββββββββ| 10275/10682 [1:38:42<03:24, 1.99it/s]
|
947 |
96%|ββββββββββ| 10276/10682 [1:38:42<03:23, 1.99it/s]
|
948 |
96%|ββββββββββ| 10277/10682 [1:38:43<03:23, 1.99it/s]
|
949 |
96%|ββββββββββ| 10278/10682 [1:38:43<03:22, 1.99it/s]
|
950 |
96%|ββββββββββ| 10279/10682 [1:38:44<03:22, 1.99it/s]
|
951 |
96%|ββββββββββ| 10280/10682 [1:38:44<03:21, 1.99it/s]
|
952 |
96%|ββββββββββ| 10281/10682 [1:38:45<03:21, 1.99it/s]
|
953 |
96%|ββββββββββ| 10282/10682 [1:38:45<03:20, 1.99it/s]
|
954 |
96%|ββββββββββ| 10283/10682 [1:38:46<03:20, 1.99it/s]
|
955 |
96%|ββββββββββ| 10284/10682 [1:38:46<03:19, 1.99it/s]
|
956 |
96%|ββββββββββ| 10285/10682 [1:38:47<03:19, 1.99it/s]
|
957 |
96%|ββββββββββ| 10286/10682 [1:38:47<03:18, 1.99it/s]
|
958 |
96%|ββββββββββ| 10287/10682 [1:38:48<03:18, 1.99it/s]
|
959 |
96%|ββββββββββ| 10288/10682 [1:38:48<03:17, 1.99it/s]
|
960 |
96%|ββββββββββ| 10289/10682 [1:38:49<03:17, 1.99it/s]
|
961 |
96%|ββββββββββ| 10290/10682 [1:38:50<03:17, 1.99it/s]
|
962 |
96%|ββββββββββ| 10291/10682 [1:38:50<03:16, 1.99it/s]
|
963 |
96%|ββββββββββ| 10292/10682 [1:38:51<03:15, 1.99it/s]
|
964 |
96%|ββββββββββ| 10293/10682 [1:38:51<03:15, 1.99it/s]
|
965 |
96%|ββββββββββ| 10294/10682 [1:38:52<03:14, 1.99it/s]
|
966 |
96%|ββββββββββ| 10295/10682 [1:38:52<03:14, 1.99it/s]
|
967 |
96%|ββββββββββ| 10296/10682 [1:38:53<03:13, 1.99it/s]
|
968 |
96%|ββββββββββ| 10297/10682 [1:38:53<03:13, 1.99it/s]
|
969 |
96%|ββββββββββ| 10298/10682 [1:38:54<03:12, 1.99it/s]
|
970 |
96%|ββββββββββ| 10299/10682 [1:38:54<03:12, 1.99it/s]
|
971 |
96%|ββββββββββ| 10300/10682 [1:38:55<03:11, 1.99it/s]{'loss': 3.111, 'grad_norm': 0.2491437792778015, 'learning_rate': 3.891208300917604e-06, 'epoch': 13.5}
|
|
|
972 |
|
973 |
96%|ββββββββββ| 10300/10682 [1:38:55<03:11, 1.99it/s]
|
974 |
96%|ββββββββββ| 10301/10682 [1:38:55<03:11, 1.99it/s]
|
975 |
96%|ββββββββββ| 10302/10682 [1:38:56<03:10, 1.99it/s]
|
976 |
96%|ββββββββββ| 10303/10682 [1:38:56<03:10, 1.99it/s]
|
977 |
96%|ββββββββββ| 10304/10682 [1:38:57<03:10, 1.99it/s]
|
978 |
96%|ββββββββββ| 10305/10682 [1:38:57<03:09, 1.99it/s]
|
979 |
96%|ββββββββββ| 10306/10682 [1:38:58<03:09, 1.99it/s]
|
980 |
96%|ββββββββββ| 10307/10682 [1:38:58<03:08, 1.99it/s]
|
981 |
96%|ββββββββββ| 10308/10682 [1:38:59<03:07, 1.99it/s]
|
982 |
97%|ββββββββββ| 10309/10682 [1:38:59<03:07, 1.99it/s]
|
983 |
97%|ββββββββββ| 10310/10682 [1:39:00<03:06, 1.99it/s]
|
984 |
97%|ββββββββββ| 10311/10682 [1:39:00<03:06, 1.99it/s]
|
985 |
97%|ββββββββββ| 10312/10682 [1:39:01<03:05, 1.99it/s]
|
986 |
97%|ββββββββββ| 10313/10682 [1:39:01<03:05, 1.99it/s]
|
987 |
97%|ββββββββββ| 10314/10682 [1:39:02<03:04, 1.99it/s]
|
988 |
97%|ββββββββββ| 10315/10682 [1:39:02<03:04, 1.99it/s]
|
989 |
97%|ββββββββββ| 10316/10682 [1:39:03<03:03, 1.99it/s]
|
990 |
97%|ββββββββββ| 10317/10682 [1:39:03<03:03, 1.99it/s]
|
991 |
97%|ββββββββββ| 10318/10682 [1:39:04<03:02, 1.99it/s]
|
992 |
97%|ββββββββββ| 10319/10682 [1:39:04<03:02, 1.99it/s]
|
993 |
97%|ββββββββββ| 10320/10682 [1:39:05<03:01, 1.99it/s]
|
994 |
97%|ββββββββββ| 10321/10682 [1:39:05<03:01, 1.99it/s]
|
995 |
97%|ββββββββββ| 10322/10682 [1:39:06<03:00, 1.99it/s]
|
996 |
97%|ββββββββββ| 10323/10682 [1:39:06<02:59, 1.99it/s]
|
997 |
97%|ββββββββββ| 10324/10682 [1:39:07<02:59, 1.99it/s]
|
998 |
97%|ββββββββββ| 10325/10682 [1:39:07<02:59, 1.99it/s]{'loss': 3.1165, 'grad_norm': 0.24889500439167023, 'learning_rate': 3.3991130994299734e-06, 'epoch': 13.53}
|
|
|
999 |
|
1000 |
97%|ββββββββββ| 10325/10682 [1:39:07<02:59, 1.99it/s]
|
1001 |
97%|ββββββββββ| 10326/10682 [1:39:08<02:58, 1.99it/s]
|
1002 |
97%|ββββββββββ| 10327/10682 [1:39:08<02:58, 1.99it/s]
|
1003 |
97%|ββββββββββ| 10328/10682 [1:39:09<02:57, 1.99it/s]
|
1004 |
97%|ββββββββββ| 10329/10682 [1:39:09<02:57, 1.99it/s]
|
1005 |
97%|ββββββββββ| 10330/10682 [1:39:10<02:56, 1.99it/s]
|
1006 |
97%|ββββββββββ| 10331/10682 [1:39:10<02:56, 1.99it/s]
|
1007 |
97%|ββββββββββ| 10332/10682 [1:39:11<02:55, 1.99it/s]
|
1008 |
97%|ββββββββββ| 10333/10682 [1:39:11<02:55, 1.99it/s]
|
1009 |
97%|ββββββββββ| 10334/10682 [1:39:12<02:54, 1.99it/s]
|
1010 |
97%|ββββββββββ| 10335/10682 [1:39:12<02:54, 1.99it/s]
|
1011 |
97%|ββββββββββ| 10336/10682 [1:39:13<02:53, 1.99it/s]
|
1012 |
97%|ββββββββββ| 10337/10682 [1:39:13<02:53, 1.99it/s]
|
1013 |
97%|ββββββββββ| 10338/10682 [1:39:14<02:52, 1.99it/s]
|
1014 |
97%|ββββββββββ| 10339/10682 [1:39:14<02:52, 1.99it/s]
|
1015 |
97%|ββββββββββ| 10340/10682 [1:39:15<02:51, 1.99it/s]
|
1016 |
97%|ββββββββββ| 10341/10682 [1:39:15<02:51, 1.99it/s]
|
1017 |
97%|ββββββββββ| 10342/10682 [1:39:16<02:50, 1.99it/s]
|
1018 |
97%|ββββββββββ| 10343/10682 [1:39:16<02:50, 1.99it/s]
|
1019 |
97%|ββββββββββ| 10344/10682 [1:39:17<02:49, 1.99it/s]
|
1020 |
97%|ββββββββββ| 10345/10682 [1:39:17<02:49, 1.99it/s]
|
1021 |
97%|ββββββββββ| 10346/10682 [1:39:18<02:48, 1.99it/s]
|
1022 |
97%|ββββββββββ| 10347/10682 [1:39:18<02:48, 1.99it/s]
|
1023 |
97%|ββββββββββ| 10348/10682 [1:39:19<02:47, 1.99it/s]
|
1024 |
97%|ββββββββββ| 10349/10682 [1:39:19<02:46, 1.99it/s]
|
1025 |
97%|ββββββββββ| 10350/10682 [1:39:20<02:46, 1.99it/s]{'loss': 3.1195, 'grad_norm': 0.2491111308336258, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
|
|
1026 |
|
1027 |
97%|ββββββββββ| 10350/10682 [1:39:20<02:46, 1.99it/s]
|
1028 |
97%|ββββββββββ| 10351/10682 [1:39:20<02:46, 1.99it/s]
|
1029 |
97%|ββββββββββ| 10352/10682 [1:39:21<02:45, 1.99it/s]
|
1030 |
97%|ββββββββββ| 10353/10682 [1:39:21<02:45, 1.99it/s]
|
1031 |
97%|ββββββββββ| 10354/10682 [1:39:22<02:44, 1.99it/s]
|
1032 |
97%|ββββββββββ| 10355/10682 [1:39:22<02:44, 1.99it/s]
|
1033 |
97%|ββββββββββ| 10356/10682 [1:39:23<02:43, 1.99it/s]
|
1034 |
97%|ββββββββββ| 10357/10682 [1:39:23<02:43, 1.99it/s]
|
1035 |
97%|ββββββββββ| 10358/10682 [1:39:24<02:42, 1.99it/s]
|
1036 |
97%|ββββββββββ| 10359/10682 [1:39:24<02:43, 1.98it/s]
|
1037 |
97%|ββββββββββ| 10360/10682 [1:39:25<02:42, 1.98it/s]
|
1038 |
97%|ββββββββββ| 10361/10682 [1:39:25<02:41, 1.98it/s]
|
1039 |
97%|ββββββββββ| 10362/10682 [1:39:26<02:40, 1.99it/s]
|
1040 |
97%|ββββββββββ| 10363/10682 [1:39:26<02:40, 1.99it/s]
|
1041 |
97%|ββββββββββ| 10364/10682 [1:39:27<02:39, 1.99it/s]
|
1042 |
97%|ββββββββββ| 10365/10682 [1:39:27<02:39, 1.99it/s]
|
1043 |
97%|ββββββββββ| 10366/10682 [1:39:28<02:38, 1.99it/s]
|
1044 |
97%|ββββββββββ| 10367/10682 [1:39:28<02:38, 1.99it/s]
|
1045 |
97%|ββββββββββ| 10368/10682 [1:39:29<02:37, 1.99it/s]
|
1046 |
97%|ββββββββββ| 10369/10682 [1:39:29<02:37, 1.99it/s]
|
1047 |
97%|ββββββββββ| 10370/10682 [1:39:30<02:36, 1.99it/s]
|
1048 |
97%|ββββββββββ| 10371/10682 [1:39:30<02:36, 1.99it/s]
|
1049 |
97%|ββββββββββ| 10372/10682 [1:39:31<02:35, 1.99it/s]
|
1050 |
97%|ββββββββββ| 10373/10682 [1:39:31<02:35, 1.99it/s]
|
1051 |
97%|ββββββββββ| 10374/10682 [1:39:32<02:34, 1.99it/s]
|
1052 |
97%|ββββββββββ| 10375/10682 [1:39:32<02:34, 1.99it/s]{'loss': 3.1152, 'grad_norm': 0.24760933220386505, 'learning_rate': 2.5143995351817882e-06, 'epoch': 13.6}
|
1053 |
|
|
|
1054 |
97%|ββββββββββ| 10375/10682 [1:39:32<02:34, 1.99it/s]
|
1055 |
97%|ββββββββββ| 10376/10682 [1:39:33<02:33, 1.99it/s]
|
1056 |
97%|ββββββββββ| 10377/10682 [1:39:33<02:33, 1.99it/s]
|
1057 |
97%|ββββββββββ| 10378/10682 [1:39:34<02:32, 1.99it/s]
|
1058 |
97%|ββββββββββ| 10379/10682 [1:39:34<02:32, 1.99it/s]
|
1059 |
97%|ββββββββββ| 10380/10682 [1:39:35<02:31, 1.99it/s]
|
1060 |
97%|ββββββββββ| 10381/10682 [1:39:35<02:31, 1.99it/s]
|
1061 |
97%|ββββββββββ| 10382/10682 [1:39:36<02:30, 1.99it/s]
|
1062 |
97%|ββββββββββ| 10383/10682 [1:39:36<02:29, 2.00it/s]
|
1063 |
97%|ββββββββββ| 10384/10682 [1:39:37<02:29, 1.99it/s]
|
1064 |
97%|ββββββββββ| 10385/10682 [1:39:37<02:28, 1.99it/s]
|
1065 |
97%|ββββββββββ| 10386/10682 [1:39:38<02:28, 1.99it/s]
|
1066 |
97%|ββββββββββ| 10387/10682 [1:39:38<02:27, 1.99it/s]
|
1067 |
97%|ββββββββββ| 10388/10682 [1:39:39<02:27, 1.99it/s]
|
1068 |
97%|ββββββββββ| 10389/10682 [1:39:39<02:26, 1.99it/s]
|
1069 |
97%|ββββββββββ| 10390/10682 [1:39:40<02:26, 1.99it/s]
|
1070 |
97%|ββββββββββ| 10391/10682 [1:39:40<02:25, 1.99it/s]
|
1071 |
97%|ββββββββββ| 10392/10682 [1:39:41<02:25, 1.99it/s]
|
1072 |
97%|ββββββββββ| 10393/10682 [1:39:41<02:25, 1.99it/s]
|
1073 |
97%|ββββββββββ| 10394/10682 [1:39:42<02:24, 2.00it/s]
|
1074 |
97%|ββββββββββ| 10395/10682 [1:39:42<02:23, 1.99it/s]
|
1075 |
97%|ββββββββββ| 10396/10682 [1:39:43<02:23, 1.99it/s]
|
1076 |
97%|ββββββββββ| 10397/10682 [1:39:43<02:23, 1.99it/s]
|
1077 |
97%|ββββββββββ| 10398/10682 [1:39:44<02:22, 1.99it/s]
|
1078 |
97%|ββββββββββ| 10399/10682 [1:39:44<02:21, 1.99it/s]
|
1079 |
97%|ββββββββββ| 10400/10682 [1:39:45<02:21, 2.00it/s]{'loss': 3.1212, 'grad_norm': 0.24558085203170776, 'learning_rate': 2.1218402281655835e-06, 'epoch': 13.63}
|
|
|
1080 |
|
1081 |
97%|ββββββββββ| 10400/10682 [1:39:45<02:21, 2.00it/s]
|
1082 |
97%|ββββββββββ| 10401/10682 [1:39:45<02:20, 1.99it/s]
|
1083 |
97%|ββββββββββ| 10402/10682 [1:39:46<02:20, 1.99it/s]
|
1084 |
97%|ββββββββββ| 10403/10682 [1:39:46<02:20, 1.99it/s]
|
1085 |
97%|ββββββββββ| 10404/10682 [1:39:47<02:19, 1.99it/s]
|
1086 |
97%|ββββββββββ| 10405/10682 [1:39:47<02:19, 1.99it/s]
|
1087 |
97%|ββββββββββ| 10406/10682 [1:39:48<02:18, 1.99it/s]
|
1088 |
97%|ββββββββββ| 10407/10682 [1:39:48<02:17, 1.99it/s]
|
1089 |
97%|ββββββββββ| 10408/10682 [1:39:49<02:17, 1.99it/s]
|
1090 |
97%|ββββββββββ| 10409/10682 [1:39:49<02:17, 1.99it/s]
|
1091 |
97%|ββββββββββ| 10410/10682 [1:39:50<02:16, 1.99it/s]
|
1092 |
97%|ββββββββββ| 10411/10682 [1:39:50<02:16, 1.99it/s]
|
1093 |
97%|ββββββββββ| 10412/10682 [1:39:51<02:15, 1.99it/s]
|
1094 |
97%|ββββββββββ| 10413/10682 [1:39:51<02:14, 1.99it/s]
|
1095 |
97%|ββββββββββ| 10414/10682 [1:39:52<02:14, 1.99it/s]
|
1096 |
98%|ββββββββββ| 10415/10682 [1:39:52<02:13, 1.99it/s]
|
1097 |
98%|ββββββββββ| 10416/10682 [1:39:53<02:13, 1.99it/s]
|
1098 |
98%|ββββββββββ| 10417/10682 [1:39:53<02:13, 1.99it/s]
|
1099 |
98%|ββββββββββ| 10418/10682 [1:39:54<02:12, 1.99it/s]
|
1100 |
98%|ββββββββββ| 10419/10682 [1:39:54<02:11, 1.99it/s]
|
1101 |
98%|ββββββββββ| 10420/10682 [1:39:55<02:11, 2.00it/s]
|
1102 |
98%|ββββββββββ| 10421/10682 [1:39:55<02:10, 1.99it/s]
|
1103 |
98%|ββββββββββ| 10422/10682 [1:39:56<02:10, 1.99it/s]
|
1104 |
98%|ββββββββββ| 10423/10682 [1:39:56<02:10, 1.99it/s]
|
1105 |
98%|ββββββββββ| 10424/10682 [1:39:57<02:09, 1.99it/s]
|
1106 |
98%|ββββββββββ| 10425/10682 [1:39:57<02:08, 1.99it/s]{'loss': 3.1095, 'grad_norm': 0.24991337954998016, 'learning_rate': 1.7625149152127318e-06, 'epoch': 13.66}
|
1107 |
|
|
|
1108 |
98%|ββββββββββ| 10425/10682 [1:39:57<02:08, 1.99it/s]
|
1109 |
98%|ββββββββββ| 10426/10682 [1:39:58<02:08, 1.99it/s]
|
1110 |
98%|ββββββββββ| 10427/10682 [1:39:58<02:07, 1.99it/s]
|
1111 |
98%|ββββββββββ| 10428/10682 [1:39:59<02:07, 1.99it/s]
|
1112 |
98%|ββββββββββ| 10429/10682 [1:39:59<02:07, 1.99it/s]
|
1113 |
98%|ββββββββββ| 10430/10682 [1:40:00<02:06, 1.99it/s]
|
1114 |
98%|ββββββββββ| 10431/10682 [1:40:00<02:06, 1.99it/s]
|
1115 |
98%|ββββββββββ| 10432/10682 [1:40:01<02:05, 1.99it/s]
|
1116 |
98%|ββββββββββ| 10433/10682 [1:40:01<02:04, 1.99it/s]
|
1117 |
98%|ββββββββββ| 10434/10682 [1:40:02<02:04, 1.99it/s]
|
1118 |
98%|ββββββββββ| 10435/10682 [1:40:02<02:04, 1.99it/s]
|
1119 |
98%|ββββββββββ| 10436/10682 [1:40:03<02:03, 1.99it/s]
|
1120 |
98%|ββββββββββ| 10437/10682 [1:40:03<02:03, 1.99it/s]
|
1121 |
98%|ββββββββββ| 10438/10682 [1:40:04<02:02, 1.99it/s]
|
1122 |
98%|ββββββββββ| 10439/10682 [1:40:04<02:02, 1.99it/s]
|
1123 |
98%|ββββββββββ| 10440/10682 [1:40:05<02:01, 1.99it/s]
|
1124 |
98%|ββββββββββ| 10441/10682 [1:40:05<02:01, 1.99it/s]
|
1125 |
98%|ββββββββββ| 10442/10682 [1:40:06<02:00, 1.99it/s]
|
1126 |
98%|ββββββββββ| 10443/10682 [1:40:06<02:00, 1.99it/s]
|
1127 |
98%|ββββββββββ| 10444/10682 [1:40:07<01:59, 1.99it/s]
|
1128 |
98%|ββββββββββ| 10445/10682 [1:40:07<01:58, 1.99it/s]
|
1129 |
98%|ββββββββββ| 10446/10682 [1:40:08<01:58, 1.99it/s]
|
1130 |
98%|ββββββββββ| 10447/10682 [1:40:08<01:58, 1.99it/s]
|
1131 |
98%|ββββββββββ| 10448/10682 [1:40:09<01:57, 1.99it/s]
|
1132 |
98%|ββββββββββ| 10449/10682 [1:40:09<01:57, 1.99it/s]
|
1133 |
98%|ββββββββββ| 10450/10682 [1:40:10<01:56, 1.99it/s]
|
1134 |
|
|
|
1135 |
98%|ββββββββββ| 10450/10682 [1:40:10<01:56, 1.99it/s]
|
1136 |
98%|ββββββββββ| 10451/10682 [1:40:10<01:56, 1.99it/s]
|
1137 |
98%|ββββββββββ| 10452/10682 [1:40:11<01:55, 1.99it/s]
|
1138 |
98%|ββββββββββ| 10453/10682 [1:40:11<01:54, 1.99it/s]
|
1139 |
98%|ββββββββββ| 10454/10682 [1:40:12<01:54, 1.99it/s]
|
1140 |
98%|ββββββββββ| 10455/10682 [1:40:12<01:54, 1.99it/s]
|
1141 |
98%|ββββββββββ| 10456/10682 [1:40:13<01:53, 1.99it/s]
|
1142 |
98%|ββββββββββ| 10457/10682 [1:40:13<01:52, 1.99it/s]
|
1143 |
98%|ββββββββββ| 10458/10682 [1:40:14<01:52, 1.99it/s]
|
1144 |
98%|ββββββββββ| 10459/10682 [1:40:14<01:51, 1.99it/s]
|
1145 |
98%|ββββββββββ| 10460/10682 [1:40:15<01:51, 1.99it/s]
|
1146 |
98%|ββββββββββ| 10461/10682 [1:40:15<01:50, 1.99it/s]
|
1147 |
98%|ββββββββββ| 10462/10682 [1:40:16<01:50, 2.00it/s]
|
1148 |
98%|ββββββββββ| 10463/10682 [1:40:16<01:49, 1.99it/s]
|
1149 |
98%|ββββββββββ| 10464/10682 [1:40:17<01:49, 1.99it/s]
|
1150 |
98%|ββββββββββ| 10465/10682 [1:40:17<01:48, 1.99it/s]
|
1151 |
98%|ββββββββββ| 10466/10682 [1:40:18<01:48, 1.99it/s]
|
1152 |
98%|ββββββββββ| 10467/10682 [1:40:18<01:48, 1.99it/s]
|
1153 |
98%|ββββββββββ| 10468/10682 [1:40:19<01:47, 1.99it/s]
|
1154 |
98%|ββββββββββ| 10469/10682 [1:40:19<01:46, 1.99it/s]
|
1155 |
98%|ββββββββββ| 10470/10682 [1:40:20<01:46, 1.99it/s]
|
1156 |
98%|ββββββββββ| 10471/10682 [1:40:20<01:45, 1.99it/s]
|
1157 |
98%|ββββββββββ| 10472/10682 [1:40:21<01:45, 1.99it/s]
|
1158 |
98%|ββββββββββ| 10473/10682 [1:40:21<01:44, 1.99it/s]
|
1159 |
98%|ββββββββββ| 10474/10682 [1:40:22<01:44, 1.99it/s]
|
1160 |
98%|ββββββββββ| 10475/10682 [1:40:22<01:43, 1.99it/s]
|
1161 |
|
|
|
1162 |
98%|ββββββββββ| 10475/10682 [1:40:22<01:43, 1.99it/s]
|
1163 |
98%|ββββββββββ| 10476/10682 [1:40:23<01:43, 1.99it/s]
|
1164 |
98%|ββββββββββ| 10477/10682 [1:40:23<01:42, 1.99it/s]
|
1165 |
98%|ββββββββββ| 10478/10682 [1:40:24<01:42, 1.99it/s]
|
1166 |
98%|ββββββββββ| 10479/10682 [1:40:24<01:42, 1.99it/s]
|
1167 |
98%|ββββββββββ| 10480/10682 [1:40:25<01:41, 1.99it/s]
|
1168 |
98%|ββββββββββ| 10481/10682 [1:40:25<01:40, 1.99it/s]
|
1169 |
98%|ββββββββββ| 10482/10682 [1:40:26<01:40, 1.99it/s]
|
1170 |
98%|ββββββββββ| 10483/10682 [1:40:26<01:39, 1.99it/s]
|
1171 |
98%|ββββββββββ| 10484/10682 [1:40:27<01:39, 1.99it/s]
|
1172 |
98%|ββββββββββ| 10485/10682 [1:40:27<01:38, 1.99it/s]
|
1173 |
98%|ββββββββββ| 10486/10682 [1:40:28<01:38, 1.99it/s]
|
1174 |
98%|ββββββββββ| 10487/10682 [1:40:28<01:38, 1.99it/s]
|
1175 |
98%|ββββββββββ| 10488/10682 [1:40:29<01:37, 1.99it/s]
|
1176 |
98%|ββββββββββ| 10489/10682 [1:40:29<01:36, 1.99it/s]
|
1177 |
98%|ββββββββββ| 10490/10682 [1:40:30<01:36, 1.99it/s]
|
1178 |
98%|ββββββββββ| 10491/10682 [1:40:30<01:35, 1.99it/s]
|
1179 |
98%|ββββββββββ| 10492/10682 [1:40:31<01:35, 1.99it/s]
|
1180 |
98%|ββββββββββ| 10493/10682 [1:40:31<01:34, 1.99it/s]
|
1181 |
98%|ββββββββββ| 10494/10682 [1:40:32<01:34, 1.99it/s]
|
1182 |
98%|ββββββββββ| 10495/10682 [1:40:32<01:33, 1.99it/s]
|
1183 |
98%|ββββββββββ| 10496/10682 [1:40:33<01:33, 1.99it/s]
|
1184 |
98%|ββββββββββ| 10497/10682 [1:40:33<01:32, 1.99it/s]
|
1185 |
98%|ββββββββββ| 10498/10682 [1:40:34<01:32, 2.00it/s]
|
1186 |
98%|ββββββββββ| 10499/10682 [1:40:34<01:31, 1.99it/s]
|
1187 |
98%|ββββββββββ| 10500/10682 [1:40:35<01:31, 1.99it/s]{'loss': 3.116, 'grad_norm': 0.24622003734111786, 'learning_rate': 8.841716933915555e-07, 'epoch': 13.76}
|
|
|
1188 |
|
1189 |
98%|ββββββββββ| 10500/10682 [1:40:35<01:31, 1.99it/s]
|
1190 |
98%|ββββββββββ| 10501/10682 [1:40:35<01:30, 1.99it/s]
|
1191 |
98%|ββββββββββ| 10502/10682 [1:40:36<01:30, 1.99it/s]
|
1192 |
98%|ββββββββββ| 10503/10682 [1:40:36<01:29, 1.99it/s]
|
1193 |
98%|ββββββββββ| 10504/10682 [1:40:37<01:29, 1.99it/s]
|
1194 |
98%|ββββββββββ| 10505/10682 [1:40:37<01:28, 1.99it/s]
|
1195 |
98%|ββββββββββ| 10506/10682 [1:40:38<01:28, 1.99it/s]
|
1196 |
98%|ββββββββββ| 10507/10682 [1:40:38<01:27, 1.99it/s]
|
1197 |
98%|ββββββββββ| 10508/10682 [1:40:39<01:27, 1.99it/s]
|
1198 |
98%|ββββββββββ| 10509/10682 [1:40:39<01:26, 1.99it/s]
|
1199 |
98%|ββββββββββ| 10510/10682 [1:40:40<01:26, 1.99it/s]
|
1200 |
98%|ββββββββββ| 10511/10682 [1:40:40<01:25, 1.99it/s]
|
1201 |
98%|ββββββββββ| 10512/10682 [1:40:41<01:25, 1.99it/s]
|
1202 |
98%|ββββββββββ| 10513/10682 [1:40:41<01:24, 1.99it/s]
|
1203 |
98%|ββββββββββ| 10514/10682 [1:40:42<01:24, 1.99it/s]
|
1204 |
98%|ββββββββββ| 10515/10682 [1:40:42<01:23, 1.99it/s]
|
1205 |
98%|ββββββββββ| 10516/10682 [1:40:43<01:23, 1.99it/s]
|
1206 |
98%|ββββββββββ| 10517/10682 [1:40:43<01:22, 1.99it/s]
|
1207 |
98%|ββββββββββ| 10518/10682 [1:40:44<01:22, 1.99it/s]
|
1208 |
98%|ββββββββββ| 10519/10682 [1:40:44<01:21, 1.99it/s]
|
1209 |
98%|ββββββββββ| 10520/10682 [1:40:45<01:21, 1.99it/s]
|
1210 |
98%|ββββββββββ| 10521/10682 [1:40:45<01:20, 1.99it/s]
|
1211 |
99%|ββββββββββ| 10522/10682 [1:40:46<01:20, 1.99it/s]
|
1212 |
99%|ββββββββββ| 10523/10682 [1:40:46<01:19, 1.99it/s]
|
1213 |
99%|ββββββββββ| 10524/10682 [1:40:47<01:19, 1.99it/s]
|
1214 |
99%|ββββββββββ| 10525/10682 [1:40:47<01:18, 1.99it/s]{'loss': 3.1108, 'grad_norm': 0.24499115347862244, 'learning_rate': 6.580000036264244e-07, 'epoch': 13.79}
|
|
|
1215 |
|
1216 |
99%|ββββββββββ| 10525/10682 [1:40:47<01:18, 1.99it/s]
|
1217 |
99%|ββββββββββ| 10526/10682 [1:40:48<01:18, 1.99it/s]
|
1218 |
99%|ββββββββββ| 10527/10682 [1:40:48<01:17, 1.99it/s]
|
1219 |
99%|ββββββββββ| 10528/10682 [1:40:49<01:17, 1.99it/s]
|
1220 |
99%|ββββββββββ| 10529/10682 [1:40:50<01:16, 1.99it/s]
|
1221 |
99%|ββββββββββ| 10530/10682 [1:40:50<01:16, 1.99it/s]
|
1222 |
99%|ββββββββββ| 10531/10682 [1:40:51<01:15, 1.99it/s]
|
1223 |
99%|ββββββββββ| 10532/10682 [1:40:51<01:15, 1.99it/s]
|
1224 |
99%|ββββββββββ| 10533/10682 [1:40:52<01:14, 1.99it/s]
|
1225 |
99%|ββββββββββ| 10534/10682 [1:40:52<01:14, 1.99it/s]
|
1226 |
99%|ββββββββββ| 10535/10682 [1:40:53<01:13, 1.99it/s]
|
1227 |
99%|ββββββββββ| 10536/10682 [1:40:53<01:13, 1.99it/s]
|
1228 |
99%|ββββββββββ| 10537/10682 [1:40:54<01:12, 1.99it/s]
|
1229 |
99%|ββββββββββ| 10538/10682 [1:40:54<01:12, 1.99it/s]
|
1230 |
99%|ββββββββββ| 10539/10682 [1:40:55<01:11, 1.99it/s]
|
1231 |
99%|ββββββββββ| 10540/10682 [1:40:55<01:11, 2.00it/s]
|
1232 |
99%|ββββββββββ| 10541/10682 [1:40:56<01:10, 2.00it/s]
|
1233 |
99%|ββββββββββ| 10542/10682 [1:40:56<01:10, 1.99it/s]
|
1234 |
99%|ββββββββββ| 10543/10682 [1:40:57<01:09, 1.99it/s]
|
1235 |
99%|ββββββββββ| 10544/10682 [1:40:57<01:09, 1.99it/s]
|
1236 |
99%|ββββββββββ| 10545/10682 [1:40:58<01:08, 1.99it/s]
|
1237 |
99%|ββββββββββ| 10546/10682 [1:40:58<01:08, 1.99it/s]
|
1238 |
99%|ββββββββββ| 10547/10682 [1:40:59<01:07, 1.99it/s]
|
1239 |
99%|ββββββββββ| 10548/10682 [1:40:59<01:07, 1.99it/s]
|
1240 |
99%|ββββββββββ| 10549/10682 [1:41:00<01:06, 1.99it/s]
|
1241 |
99%|ββββββββββ| 10550/10682 [1:41:00<01:06, 1.99it/s]
|
1242 |
{'loss': 3.1109, 'grad_norm': 0.24714048206806183, 'learning_rate': 4.651600211027507e-07, 'epoch': 13.83}
|
|
|
1243 |
99%|ββββββββββ| 10550/10682 [1:41:00<01:06, 1.99it/s]
|
1244 |
99%|ββββββββββ| 10551/10682 [1:41:01<01:05, 1.99it/s]
|
1245 |
99%|ββββββββββ| 10552/10682 [1:41:01<01:05, 1.99it/s]
|
1246 |
99%|ββββββββββ| 10553/10682 [1:41:02<01:04, 1.99it/s]
|
1247 |
99%|ββββββββββ| 10554/10682 [1:41:02<01:04, 1.99it/s]
|
1248 |
99%|ββββββββββ| 10555/10682 [1:41:03<01:03, 1.99it/s]
|
1249 |
99%|ββββββββββ| 10556/10682 [1:41:03<01:03, 1.99it/s]
|
1250 |
99%|ββββββββββ| 10557/10682 [1:41:04<01:02, 1.99it/s]
|
1251 |
99%|ββββββββββ| 10558/10682 [1:41:04<01:02, 1.99it/s]
|
1252 |
99%|ββββββββββ| 10559/10682 [1:41:05<01:01, 1.99it/s]
|
1253 |
99%|ββββββββββ| 10560/10682 [1:41:05<01:01, 1.99it/s]
|
1254 |
99%|ββββββββββ| 10561/10682 [1:41:06<01:00, 1.99it/s]
|
1255 |
99%|ββββββββββ| 10562/10682 [1:41:06<01:00, 1.99it/s]
|
1256 |
99%|ββββββββββ| 10563/10682 [1:41:07<00:59, 1.99it/s]
|
1257 |
99%|ββββββββββ| 10564/10682 [1:41:07<00:59, 1.99it/s]
|
1258 |
99%|ββββββββββ| 10565/10682 [1:41:08<00:58, 1.99it/s]
|
1259 |
99%|ββββββββββ| 10566/10682 [1:41:08<00:58, 1.99it/s]
|
1260 |
99%|ββββββββββ| 10567/10682 [1:41:09<00:57, 1.99it/s]
|
1261 |
99%|ββββββββββ| 10568/10682 [1:41:09<00:57, 1.99it/s]
|
1262 |
99%|ββββββββββ| 10569/10682 [1:41:10<00:56, 1.99it/s]
|
1263 |
99%|ββββββββββ| 10570/10682 [1:41:10<00:56, 1.99it/s]
|
1264 |
99%|ββββββββββ| 10571/10682 [1:41:11<00:55, 1.99it/s]
|
1265 |
99%|ββββββββββ| 10572/10682 [1:41:11<00:55, 1.99it/s]
|
1266 |
99%|ββββββββββ| 10573/10682 [1:41:12<00:54, 1.99it/s]
|
1267 |
99%|ββββββββββ| 10574/10682 [1:41:12<00:54, 1.99it/s]
|
1268 |
99%|ββββββββββ| 10575/10682 [1:41:13<00:53, 1.99it/s]{'loss': 3.1055, 'grad_norm': 0.24866575002670288, 'learning_rate': 3.0566461813213986e-07, 'epoch': 13.86}
|
|
|
1269 |
|
1270 |
99%|ββββββββββ| 10575/10682 [1:41:13<00:53, 1.99it/s]
|
1271 |
99%|ββββββββββ| 10576/10682 [1:41:13<00:53, 1.99it/s]
|
1272 |
99%|ββββββββββ| 10577/10682 [1:41:14<00:52, 1.99it/s]
|
1273 |
99%|ββββββββββ| 10578/10682 [1:41:14<00:52, 1.99it/s]
|
1274 |
99%|ββββββββββ| 10579/10682 [1:41:15<00:51, 1.99it/s]
|
1275 |
99%|ββββββββββ| 10580/10682 [1:41:15<00:51, 1.99it/s]
|
1276 |
99%|ββββββββββ| 10581/10682 [1:41:16<00:50, 1.99it/s]
|
1277 |
99%|ββββββββββ| 10582/10682 [1:41:16<00:50, 1.99it/s]
|
1278 |
99%|ββββββββββ| 10583/10682 [1:41:17<00:49, 1.99it/s]
|
1279 |
99%|ββββββββββ| 10584/10682 [1:41:17<00:49, 1.99it/s]
|
1280 |
99%|ββββββββββ| 10585/10682 [1:41:18<00:48, 1.99it/s]
|
1281 |
99%|ββββββββββ| 10586/10682 [1:41:18<00:48, 1.99it/s]
|
1282 |
99%|ββββββββββ| 10587/10682 [1:41:19<00:47, 1.99it/s]
|
1283 |
99%|ββββββββββ| 10588/10682 [1:41:19<00:47, 1.99it/s]
|
1284 |
99%|ββββββββββ| 10589/10682 [1:41:20<00:46, 1.99it/s]
|
1285 |
99%|ββββββββββ| 10590/10682 [1:41:20<00:46, 1.99it/s]
|
1286 |
99%|ββββββββββ| 10591/10682 [1:41:21<00:45, 1.99it/s]
|
1287 |
99%|ββββββββββ| 10592/10682 [1:41:21<00:45, 1.99it/s]
|
1288 |
99%|ββββββββββ| 10593/10682 [1:41:22<00:44, 1.99it/s]
|
1289 |
99%|ββββββββββ| 10594/10682 [1:41:22<00:44, 1.99it/s]
|
1290 |
99%|ββββββββββ| 10595/10682 [1:41:23<00:43, 1.99it/s]
|
1291 |
99%|ββββββββββ| 10596/10682 [1:41:23<00:43, 1.99it/s]
|
1292 |
99%|ββββββββββ| 10597/10682 [1:41:24<00:42, 1.99it/s]
|
1293 |
99%|ββββββββββ| 10598/10682 [1:41:24<00:42, 1.97it/s]
|
1294 |
99%|ββββββββββ| 10599/10682 [1:41:25<00:41, 1.98it/s]
|
1295 |
99%|ββββββββββ| 10600/10682 [1:41:25<00:41, 1.98it/s]
|
1296 |
|
|
|
1297 |
99%|ββββββββββ| 10600/10682 [1:41:25<00:41, 1.98it/s]
|
1298 |
99%|ββββββββββ| 10601/10682 [1:41:26<00:40, 1.99it/s]
|
1299 |
99%|ββββββββββ| 10602/10682 [1:41:26<00:40, 1.99it/s]
|
1300 |
99%|ββββββββββ| 10603/10682 [1:41:27<00:39, 1.99it/s]
|
1301 |
99%|ββββββββββ| 10604/10682 [1:41:27<00:39, 1.99it/s]
|
1302 |
99%|ββββββββββ| 10605/10682 [1:41:28<00:38, 1.99it/s]
|
1303 |
99%|ββββββββββ| 10606/10682 [1:41:28<00:38, 1.99it/s]
|
1304 |
99%|ββββββββββ| 10607/10682 [1:41:29<00:37, 1.99it/s]
|
1305 |
99%|ββββββββββ| 10608/10682 [1:41:29<00:37, 2.00it/s]
|
1306 |
99%|ββββββββββ| 10609/10682 [1:41:30<00:36, 2.00it/s]
|
1307 |
99%|ββββββββββ| 10610/10682 [1:41:30<00:36, 2.00it/s]
|
1308 |
99%|ββββββββββ| 10611/10682 [1:41:31<00:35, 1.99it/s]
|
1309 |
99%|ββββββββββ| 10612/10682 [1:41:31<00:35, 2.00it/s]
|
1310 |
99%|ββββββββββ| 10613/10682 [1:41:32<00:34, 2.00it/s]
|
1311 |
99%|ββββββββββ| 10614/10682 [1:41:32<00:34, 1.99it/s]
|
1312 |
99%|ββββββββββ| 10615/10682 [1:41:33<00:33, 1.99it/s]
|
1313 |
99%|ββββββββββ| 10616/10682 [1:41:33<00:33, 1.99it/s]
|
1314 |
99%|ββββββββββ| 10617/10682 [1:41:34<00:32, 1.99it/s]
|
1315 |
99%|ββββββββββ| 10618/10682 [1:41:34<00:32, 1.99it/s]
|
1316 |
99%|ββββββββββ| 10619/10682 [1:41:35<00:31, 1.99it/s]
|
1317 |
99%|ββββββββββ| 10620/10682 [1:41:35<00:31, 1.99it/s]
|
1318 |
99%|ββββββββββ| 10621/10682 [1:41:36<00:30, 1.99it/s]
|
1319 |
99%|ββββββββββ| 10622/10682 [1:41:36<00:30, 1.99it/s]
|
1320 |
99%|ββββββββββ| 10623/10682 [1:41:37<00:29, 1.99it/s]
|
1321 |
99%|ββββββββββ| 10624/10682 [1:41:37<00:29, 2.00it/s]
|
1322 |
99%|ββββββββββ| 10625/10682 [1:41:38<00:28, 1.99it/s]{'loss': 3.1138, 'grad_norm': 0.24934372305870056, 'learning_rate': 8.674791042273533e-08, 'epoch': 13.93}
|
1323 |
|
|
|
1324 |
99%|ββββββββββ| 10625/10682 [1:41:38<00:28, 1.99it/s]
|
1325 |
99%|ββββββββββ| 10626/10682 [1:41:38<00:28, 1.99it/s]
|
1326 |
99%|ββββββββββ| 10627/10682 [1:41:39<00:27, 1.99it/s]
|
1327 |
99%|ββββββββββ| 10628/10682 [1:41:39<00:27, 2.00it/s]
|
1328 |
|
|
|
1329 |
|
|
|
1330 |
|
|
|
|
|
|
502 |
|
503 |
|
504 |
92%|ββββββββββ| 9875/10682 [1:33:52<06:46, 1.99it/s]
|
505 |
92%|ββββββββββ| 9876/10682 [1:33:52<06:45, 1.99it/s]
|
506 |
92%|ββββββββββ| 9877/10682 [1:33:53<06:43, 1.99it/s]
|
507 |
92%|ββββββββββ| 9878/10682 [1:33:53<06:43, 1.99it/s]
|
508 |
92%|ββββββββββ| 9879/10682 [1:33:54<06:43, 1.99it/s]
|
509 |
92%|ββββββββββ| 9880/10682 [1:33:54<06:43, 1.99it/s]
|
510 |
93%|ββββββββββ| 9881/10682 [1:33:55<06:43, 1.99it/s]
|
511 |
93%|ββββββββββ| 9882/10682 [1:33:55<06:42, 1.99it/s]
|
512 |
93%|ββββββββββ| 9883/10682 [1:33:56<06:41, 1.99it/s]
|
513 |
93%|ββββββββββ| 9884/10682 [1:33:56<06:40, 1.99it/s]
|
514 |
93%|ββββββββββ| 9885/10682 [1:33:57<06:40, 1.99it/s]
|
515 |
93%|ββββββββββ| 9886/10682 [1:33:57<06:39, 1.99it/s]
|
516 |
93%|ββββββββββ| 9887/10682 [1:33:58<06:39, 1.99it/s]
|
517 |
93%|ββββββββββ| 9888/10682 [1:33:58<06:39, 1.99it/s]
|
518 |
93%|ββββββββββ| 9889/10682 [1:33:59<06:38, 1.99it/s]
|
519 |
93%|ββββββββββ| 9890/10682 [1:33:59<06:38, 1.99it/s]
|
520 |
93%|ββββββββββ| 9891/10682 [1:34:00<06:37, 1.99it/s]
|
521 |
93%|ββββββββββ| 9892/10682 [1:34:00<06:36, 1.99it/s]
|
522 |
93%|ββββββββββ| 9893/10682 [1:34:01<06:36, 1.99it/s]
|
523 |
93%|ββββββββββ| 9894/10682 [1:34:01<06:36, 1.99it/s]
|
524 |
93%|ββββββββββ| 9895/10682 [1:34:02<06:35, 1.99it/s]
|
525 |
93%|ββββββββββ| 9896/10682 [1:34:02<06:34, 1.99it/s]
|
526 |
93%|ββββββββββ| 9897/10682 [1:34:03<06:34, 1.99it/s]
|
527 |
93%|ββββββββββ| 9898/10682 [1:34:03<06:34, 1.99it/s]
|
528 |
93%|ββββββββββ| 9899/10682 [1:34:04<06:33, 1.99it/s]
|
529 |
93%|ββββββββββ| 9900/10682 [1:34:04<06:32, 1.99it/s]{'loss': 3.1411, 'grad_norm': 0.2500966787338257, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.98}
|
530 |
|
531 |
|
532 |
93%|ββββββββββ| 9900/10682 [1:34:04<06:32, 1.99it/s]
|
533 |
93%|ββββββββββ| 9901/10682 [1:34:05<06:32, 1.99it/s]
|
534 |
93%|ββββββββββ| 9902/10682 [1:34:05<06:31, 1.99it/s]
|
535 |
93%|ββββββββββ| 9903/10682 [1:34:06<06:31, 1.99it/s]
|
536 |
93%|ββββββββββ| 9904/10682 [1:34:06<06:31, 1.99it/s]
|
537 |
93%|ββββββββββ| 9905/10682 [1:34:07<06:29, 1.99it/s]
|
538 |
93%|ββββββββββ| 9906/10682 [1:34:07<06:28, 2.00it/s]
|
539 |
93%|ββββββββββ| 9907/10682 [1:34:08<06:28, 1.99it/s]
|
540 |
93%|ββββββββββ| 9908/10682 [1:34:08<06:28, 1.99it/s]
|
541 |
93%|ββββββββββ| 9909/10682 [1:34:09<06:27, 1.99it/s]
|
542 |
93%|ββββββββββ| 9910/10682 [1:34:09<06:27, 1.99it/s]
|
543 |
93%|ββββββββββ| 9911/10682 [1:34:10<06:26, 2.00it/s]
|
544 |
93%|ββββββββββ| 9912/10682 [1:34:10<06:25, 2.00it/s]
|
545 |
93%|ββββββββββ| 9913/10682 [1:34:11<06:25, 1.99it/s]
|
546 |
93%|ββββββββββ| 9914/10682 [1:34:11<06:25, 1.99it/s]
|
547 |
93%|ββββββββββ| 9915/10682 [1:34:12<06:24, 2.00it/s]
|
548 |
93%|ββββββββββ| 9916/10682 [1:34:12<06:23, 2.00it/s]
|
549 |
93%|ββββββββββ| 9917/10682 [1:34:13<06:22, 2.00it/s]
|
550 |
93%|ββββββββββ| 9918/10682 [1:34:13<06:22, 2.00it/s]
|
551 |
93%|ββββββββββ| 9919/10682 [1:34:14<06:17, 2.02it/s]
|
552 |
93%|ββββββββββ| 9920/10682 [1:35:44<5:47:38, 27.37s/it]
|
553 |
93%|ββββββββββ| 9921/10682 [1:35:44<4:04:56, 19.31s/it]
|
554 |
93%|ββββββββββ| 9922/10682 [1:35:45<2:53:07, 13.67s/it]
|
555 |
93%|ββββββββββ| 9923/10682 [1:35:45<2:02:58, 9.72s/it]
|
556 |
93%|ββββββββββ| 9924/10682 [1:35:46<1:27:51, 6.96s/it]
|
557 |
93%|ββββββββββ| 9925/10682 [1:35:46<1:03:19, 5.02s/it]{'loss': 3.1493, 'grad_norm': 0.24704374372959137, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.01}
|
558 |
|
559 |
|
560 |
93%|ββββββββββ| 9925/10682 [1:35:46<1:03:19, 5.02s/it]
|
561 |
93%|ββββββββββ| 9926/10682 [1:35:47<46:09, 3.66s/it]
|
562 |
93%|ββββββββββ| 9927/10682 [1:35:47<34:09, 2.71s/it]
|
563 |
93%|ββββββββββ| 9928/10682 [1:35:48<25:46, 2.05s/it]
|
564 |
93%|ββββββββββ| 9929/10682 [1:35:48<19:53, 1.59s/it]
|
565 |
93%|ββββββββββ| 9930/10682 [1:35:49<15:47, 1.26s/it]
|
566 |
93%|ββββββββββ| 9931/10682 [1:35:49<12:55, 1.03s/it]
|
567 |
93%|ββββββββββ| 9932/10682 [1:35:50<10:54, 1.15it/s]
|
568 |
93%|ββββββββββ| 9933/10682 [1:35:50<09:30, 1.31it/s]
|
569 |
93%|ββββββββββ| 9934/10682 [1:35:51<08:30, 1.47it/s]
|
570 |
93%|βββββββοΏ½οΏ½οΏ½ββ| 9935/10682 [1:35:51<07:49, 1.59it/s]
|
571 |
93%|ββββββββββ| 9936/10682 [1:35:52<07:19, 1.70it/s]
|
572 |
93%|ββββββββββ| 9937/10682 [1:35:52<06:58, 1.78it/s]
|
573 |
93%|ββββββββββ| 9938/10682 [1:35:53<06:44, 1.84it/s]
|
574 |
93%|ββββββββββ| 9939/10682 [1:35:53<06:34, 1.88it/s]
|
575 |
93%|ββββββββββ| 9940/10682 [1:35:54<06:27, 1.92it/s]
|
576 |
93%|ββββββββββ| 9941/10682 [1:35:54<06:21, 1.94it/s]
|
577 |
93%|ββββββββββ| 9942/10682 [1:35:55<06:17, 1.96it/s]
|
578 |
93%|ββββββββββ| 9943/10682 [1:35:55<06:15, 1.97it/s]
|
579 |
93%|ββββββββββ| 9944/10682 [1:35:56<06:13, 1.98it/s]
|
580 |
93%|ββββββββββ| 9945/10682 [1:35:56<06:11, 1.98it/s]
|
581 |
93%|ββββββββββ| 9946/10682 [1:35:57<06:09, 1.99it/s]
|
582 |
93%|ββββββββββ| 9947/10682 [1:35:57<06:09, 1.99it/s]
|
583 |
93%|ββββββββββ| 9948/10682 [1:35:58<06:08, 1.99it/s]
|
584 |
93%|ββββββββββ| 9949/10682 [1:35:58<06:07, 2.00it/s]
|
585 |
93%|ββββββββββ| 9950/10682 [1:35:59<06:06, 2.00it/s]{'loss': 3.1011, 'grad_norm': 0.24837680160999298, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.04}
|
586 |
|
|
|
587 |
93%|ββββββββββ| 9950/10682 [1:35:59<06:06, 2.00it/s]
|
588 |
93%|ββββββββββ| 9951/10682 [1:35:59<06:06, 2.00it/s]
|
589 |
93%|ββββββββββ| 9952/10682 [1:36:00<06:05, 2.00it/s]
|
590 |
93%|ββββββββββ| 9953/10682 [1:36:00<06:05, 2.00it/s]
|
591 |
93%|ββββββββββ| 9954/10682 [1:36:01<06:04, 2.00it/s]
|
592 |
93%|ββββββββββ| 9955/10682 [1:36:01<06:03, 2.00it/s]
|
593 |
93%|ββββββββββ| 9956/10682 [1:36:02<06:03, 2.00it/s]
|
594 |
+
|
595 |
93%|ββββββββββ| 9950/10682 [1:35:59<06:06, 2.00it/s]
|
596 |
93%|ββββββββββ| 9951/10682 [1:35:59<06:06, 2.00it/s]
|
597 |
93%|ββββββββββ| 9952/10682 [1:36:00<06:05, 2.00it/s]
|
598 |
93%|ββββββββββ| 9953/10682 [1:36:00<06:05, 2.00it/s]
|
599 |
93%|ββββββββββ| 9954/10682 [1:36:01<06:04, 2.00it/s]
|
600 |
93%|ββββββββββ| 9955/10682 [1:36:01<06:03, 2.00it/s]
|
601 |
93%|ββββββββββ| 9956/10682 [1:36:02<06:03, 2.00it/s]
|
602 |
93%|ββββββββββ| 9957/10682 [1:36:02<06:03, 1.99it/s]
|
603 |
93%|ββββββββββ| 9958/10682 [1:36:03<06:02, 2.00it/s]
|
604 |
93%|ββββββββββ| 9959/10682 [1:36:03<06:01, 2.00it/s]
|
605 |
93%|ββββββββββ| 9960/10682 [1:36:04<06:01, 2.00it/s]
|
606 |
93%|ββββββββββ| 9961/10682 [1:36:04<06:01, 2.00it/s]
|
607 |
93%|ββββββββββ| 9962/10682 [1:36:05<06:00, 2.00it/s]
|
608 |
93%|ββββββββββ| 9963/10682 [1:36:05<05:59, 2.00it/s]
|
609 |
93%|ββββββββββ| 9964/10682 [1:36:06<05:59, 2.00it/s]
|
610 |
93%|ββββββββββ| 9965/10682 [1:36:06<05:58, 2.00it/s]
|
611 |
93%|ββββββββββ| 9966/10682 [1:36:07<05:58, 2.00it/s]
|
612 |
93%|ββββββββββ| 9967/10682 [1:36:07<05:57, 2.00it/s]
|
613 |
93%|ββββββββββ| 9968/10682 [1:36:08<05:57, 2.00it/s]
|
614 |
93%|ββββββββββ| 9969/10682 [1:36:08<05:56, 2.00it/s]
|
615 |
93%|ββββββββββ| 9970/10682 [1:36:09<05:56, 2.00it/s]
|
616 |
93%|ββββββββββ| 9971/10682 [1:36:09<05:56, 2.00it/s]
|
617 |
93%|ββββββββββ| 9972/10682 [1:36:10<05:55, 2.00it/s]
|
618 |
93%|ββββββββββ| 9973/10682 [1:36:10<05:54, 2.00it/s]
|
619 |
93%|ββββββββββ| 9974/10682 [1:36:11<05:54, 2.00it/s]
|
620 |
93%|ββββββββββ| 9975/10682 [1:36:11<05:53, 2.00it/s]
|
621 |
{'loss': 3.1115, 'grad_norm': 0.24749550223350525, 'learning_rate': 1.3287025325307511e-05, 'epoch': 13.07}
|
622 |
+
|
623 |
93%|ββββββββββ| 9975/10682 [1:36:11<05:53, 2.00it/s]
|
624 |
93%|ββββββββββ| 9976/10682 [1:36:12<05:53, 2.00it/s]
|
625 |
93%|ββββββββββ| 9977/10682 [1:36:12<05:52, 2.00it/s]
|
626 |
93%|ββββββββββ| 9978/10682 [1:36:13<05:52, 2.00it/s]
|
627 |
93%|ββββββββββ| 9979/10682 [1:36:13<05:51, 2.00it/s]
|
628 |
93%|ββββββββββ| 9980/10682 [1:36:14<05:51, 2.00it/s]
|
629 |
93%|ββββββββββ| 9981/10682 [1:36:14<05:51, 2.00it/s]
|
630 |
93%|ββββββββββ| 9982/10682 [1:36:15<05:50, 2.00it/s]
|
631 |
93%|ββββββββββ| 9983/10682 [1:36:15<05:50, 2.00it/s]
|
632 |
93%|ββββββββββ| 9984/10682 [1:36:16<05:49, 2.00it/s]
|
633 |
93%|ββββββββββ| 9985/10682 [1:36:16<05:48, 2.00it/s]
|
634 |
93%|ββββββββββ| 9986/10682 [1:36:17<05:48, 2.00it/s]
|
635 |
93%|ββββββββββ| 9987/10682 [1:36:17<05:48, 2.00it/s]
|
636 |
94%|ββββββββββ| 9988/10682 [1:36:18<05:48, 1.99it/s]
|
637 |
94%|ββββββββββ| 9989/10682 [1:36:18<05:47, 1.99it/s]
|
638 |
94%|ββββββββββ| 9990/10682 [1:36:19<05:46, 1.99it/s]
|
639 |
94%|ββββββββββ| 9991/10682 [1:36:19<05:45, 2.00it/s]
|
640 |
94%|ββββββββββ| 9992/10682 [1:36:20<05:45, 2.00it/s]
|
641 |
94%|ββββββββββ| 9993/10682 [1:36:20<05:45, 2.00it/s]
|
642 |
94%|ββββββββββ| 9994/10682 [1:36:21<05:45, 1.99it/s]
|
643 |
94%|ββββββββββ| 9995/10682 [1:36:21<05:45, 1.99it/s]
|
644 |
94%|ββββββββββ| 9996/10682 [1:36:22<05:44, 1.99it/s]
|
645 |
94%|ββββββββββ| 9997/10682 [1:36:22<05:43, 2.00it/s]
|
646 |
94%|ββββββββββ| 9998/10682 [1:36:23<05:42, 2.00it/s]
|
647 |
94%|ββββββββββ| 9999/10682 [1:36:23<05:42, 1.99it/s]
|
648 |
94%|ββββββββββ| 10000/10682 [1:36:24<05:42, 1.99it/s]
|
649 |
|
650 |
+
|
651 |
94%|ββββββββββ| 10000/10682 [1:36:24<05:42, 1.99it/s]
|
652 |
94%|ββββββββββ| 10001/10682 [1:36:24<05:41, 1.99it/s]
|
653 |
94%|ββββββββββ| 10002/10682 [1:36:25<05:40, 2.00it/s]
|
654 |
94%|ββββββββββ| 10003/10682 [1:36:25<05:39, 2.00it/s]
|
655 |
94%|ββββββββββ| 10004/10682 [1:36:26<05:40, 1.99it/s]
|
656 |
94%|ββββββββββ| 10005/10682 [1:36:26<05:39, 1.99it/s]
|
657 |
94%|ββββββββββ| 10006/10682 [1:36:27<05:39, 1.99it/s]
|
658 |
94%|ββββββββββ| 10007/10682 [1:36:27<05:38, 2.00it/s]
|
659 |
94%|ββββββββββ| 10008/10682 [1:36:28<05:37, 2.00it/s]
|
660 |
94%|ββββββββββ| 10009/10682 [1:36:28<05:36, 2.00it/s]
|
661 |
94%|ββββββββββ| 10010/10682 [1:36:29<05:36, 1.99it/s]
|
662 |
94%|ββββββββββ| 10011/10682 [1:36:29<05:36, 2.00it/s]
|
663 |
94%|ββββββββββ| 10012/10682 [1:36:30<05:35, 2.00it/s]
|
664 |
94%|ββββββββββ| 10013/10682 [1:36:30<05:34, 2.00it/s]
|
665 |
94%|ββββββββββ| 10014/10682 [1:36:31<05:34, 2.00it/s]
|
666 |
94%|ββββββββββ| 10015/10682 [1:36:31<05:34, 1.99it/s]
|
667 |
94%|ββββββββββ| 10016/10682 [1:36:32<05:33, 1.99it/s]
|
668 |
94%|ββββββββββ| 10017/10682 [1:36:32<05:32, 2.00it/s]
|
669 |
94%|ββββββββββ| 10018/10682 [1:36:33<05:32, 2.00it/s]
|
670 |
94%|ββββββββββ| 10019/10682 [1:36:33<05:32, 2.00it/s]
|
671 |
94%|ββββββββββ| 10020/10682 [1:36:34<05:31, 1.99it/s]
|
672 |
94%|ββββββββββ| 10021/10682 [1:36:34<05:31, 1.99it/s]
|
673 |
94%|ββββββββββ| 10022/10682 [1:36:35<05:31, 1.99it/s]
|
674 |
94%|ββββββββββ| 10023/10682 [1:36:35<05:30, 1.99it/s]
|
675 |
94%|ββββββββββ| 10024/10682 [1:36:36<05:29, 2.00it/s]
|
676 |
94%|ββββββββββ| 10025/10682 [1:36:36<05:29, 2.00it/s]
|
677 |
{'loss': 3.1075, 'grad_norm': 0.24997055530548096, 'learning_rate': 1.1481100210606388e-05, 'epoch': 13.14}
|
678 |
+
|
679 |
94%|ββββββββββ| 10025/10682 [1:36:36<05:29, 2.00it/s]
|
680 |
94%|ββββββββββ| 10026/10682 [1:36:37<05:29, 1.99it/s]
|
681 |
94%|ββββββββββ| 10027/10682 [1:36:37<05:28, 1.99it/s]
|
682 |
94%|ββββββββββ| 10028/10682 [1:36:38<05:27, 1.99it/s]
|
683 |
94%|ββββββββββ| 10029/10682 [1:36:38<05:26, 2.00it/s]
|
684 |
94%|ββββββββββ| 10030/10682 [1:36:39<05:26, 2.00it/s]
|
685 |
94%|ββββββββββ| 10031/10682 [1:36:39<05:26, 2.00it/s]
|
686 |
94%|ββββββββββ| 10032/10682 [1:36:40<05:25, 1.99it/s]
|
687 |
94%|ββββββββββ| 10033/10682 [1:36:40<05:25, 2.00it/s]
|
688 |
94%|ββββββββββ| 10034/10682 [1:36:41<05:24, 1.99it/s]
|
689 |
94%|ββββββββββ| 10035/10682 [1:36:41<05:24, 1.99it/s]
|
690 |
94%|ββββββββββ| 10036/10682 [1:36:42<05:23, 1.99it/s]
|
691 |
94%|ββββββββββ| 10037/10682 [1:36:42<05:23, 2.00it/s]
|
692 |
94%|ββββββββββ| 10038/10682 [1:36:43<05:22, 2.00it/s]
|
693 |
94%|ββββββββββ| 10039/10682 [1:36:43<05:22, 2.00it/s]
|
694 |
94%|ββββββββββ| 10040/10682 [1:36:44<05:21, 2.00it/s]
|
695 |
94%|ββββββββββ| 10041/10682 [1:36:44<05:20, 2.00it/s]
|
696 |
94%|ββββββββββ| 10042/10682 [1:36:45<05:20, 2.00it/s]
|
697 |
94%|ββββββββββ| 10043/10682 [1:36:45<05:19, 2.00it/s]
|
698 |
94%|ββββββββββ| 10044/10682 [1:36:46<05:19, 2.00it/s]
|
699 |
94%|ββββββββββ| 10045/10682 [1:36:46<05:18, 2.00it/s]
|
700 |
94%|ββββββββββ| 10046/10682 [1:36:47<05:18, 2.00it/s]
|
701 |
94%|ββββββββββ| 10047/10682 [1:36:47<05:17, 2.00it/s]
|
702 |
94%|ββββββββββ| 10048/10682 [1:36:48<05:17, 2.00it/s]
|
703 |
94%|ββββββββββ| 10049/10682 [1:36:48<05:16, 2.00it/s]
|
704 |
94%|ββββββββββ| 10050/10682 [1:36:49<05:16, 2.00it/s]
|
705 |
|
706 |
+
|
707 |
94%|ββββββββββ| 10050/10682 [1:36:49<05:16, 2.00it/s]
|
708 |
94%|ββββββββββ| 10051/10682 [1:36:49<05:16, 2.00it/s]
|
709 |
94%|ββββββββββ| 10052/10682 [1:36:50<05:15, 2.00it/s]
|
710 |
94%|ββββββββββ| 10053/10682 [1:36:50<05:15, 2.00it/s]
|
711 |
94%|ββββββββββ| 10054/10682 [1:36:51<05:14, 1.99it/s]
|
712 |
94%|ββββββββββ| 10055/10682 [1:36:51<05:14, 1.99it/s]
|
713 |
94%|ββββββββββ| 10056/10682 [1:36:52<05:13, 2.00it/s]
|
714 |
94%|ββββββββββ| 10057/10682 [1:36:52<05:12, 2.00it/s]
|
715 |
94%|ββββββββββ| 10058/10682 [1:36:53<05:13, 1.99it/s]
|
716 |
94%|ββββββββββ| 10059/10682 [1:36:53<05:12, 1.99it/s]
|
717 |
94%|ββββββββββ| 10060/10682 [1:36:54<05:11, 1.99it/s]
|
718 |
94%|ββββββββββ| 10061/10682 [1:36:54<05:10, 2.00it/s]
|
719 |
94%|ββββββββββ| 10062/10682 [1:36:55<05:10, 2.00it/s]
|
720 |
94%|ββββββββββ| 10063/10682 [1:36:55<05:10, 2.00it/s]
|
721 |
94%|ββββββββββ| 10064/10682 [1:36:56<05:10, 1.99it/s]
|
722 |
94%|ββββββββββ| 10065/10682 [1:36:56<05:09, 1.99it/s]
|
723 |
94%|ββββββββββ| 10066/10682 [1:36:57<05:09, 1.99it/s]
|
724 |
94%|ββββββββββ| 10067/10682 [1:36:57<05:08, 1.99it/s]
|
725 |
94%|ββββββββββ| 10068/10682 [1:36:58<05:07, 1.99it/s]
|
726 |
94%|ββββββββββ| 10069/10682 [1:36:58<05:07, 1.99it/s]
|
727 |
94%|ββββββββββ| 10070/10682 [1:36:59<05:06, 1.99it/s]
|
728 |
94%|ββββββββββ| 10071/10682 [1:36:59<05:06, 1.99it/s]
|
729 |
94%|ββββββββββ| 10072/10682 [1:37:00<05:05, 1.99it/s]
|
730 |
94%|ββββββββββ| 10073/10682 [1:37:00<05:05, 2.00it/s]
|
731 |
94%|ββββββββββ| 10074/10682 [1:37:01<05:04, 2.00it/s]
|
732 |
94%|ββββββββββ| 10075/10682 [1:37:01<05:04, 1.99it/s]
|
733 |
|
734 |
+
|
735 |
94%|ββββββββββ| 10075/10682 [1:37:01<05:04, 1.99it/s]
|
736 |
94%|ββββββββββ| 10076/10682 [1:37:02<05:04, 1.99it/s]
|
737 |
94%|ββββββββββ| 10077/10682 [1:37:02<05:03, 1.99it/s]
|
738 |
94%|ββββββββββ| 10078/10682 [1:37:03<05:02, 1.99it/s]
|
739 |
94%|ββββββββββ| 10079/10682 [1:37:03<05:02, 2.00it/s]
|
740 |
94%|ββββββββββ| 10080/10682 [1:37:04<05:01, 1.99it/s]
|
741 |
94%|ββββββββββ| 10081/10682 [1:37:04<05:01, 1.99it/s]
|
742 |
94%|ββββββββββ| 10082/10682 [1:37:05<05:01, 1.99it/s]
|
743 |
94%|ββββββββββ| 10083/10682 [1:37:05<05:00, 1.99it/s]
|
744 |
94%|ββββββββββ| 10084/10682 [1:37:06<04:59, 1.99it/s]
|
745 |
94%|ββββββββββ| 10085/10682 [1:37:06<04:59, 2.00it/s]
|
746 |
94%|ββββββββββ| 10086/10682 [1:37:07<04:58, 1.99it/s]
|
747 |
94%|ββββββββββ| 10087/10682 [1:37:07<04:58, 1.99it/s]
|
748 |
94%|ββββββββββ| 10088/10682 [1:37:08<04:57, 1.99it/s]
|
749 |
94%|ββββββββββ| 10089/10682 [1:37:08<04:57, 2.00it/s]
|
750 |
94%|ββββββββββ| 10090/10682 [1:37:09<04:56, 2.00it/s]
|
751 |
94%|ββββββββββ| 10091/10682 [1:37:09<04:56, 2.00it/s]
|
752 |
94%|ββββββββββ| 10092/10682 [1:37:10<04:55, 2.00it/s]
|
753 |
94%|ββββββββββ| 10093/10682 [1:37:10<04:54, 2.00it/s]
|
754 |
94%|ββββββββββ| 10094/10682 [1:37:11<04:54, 2.00it/s]
|
755 |
95%|ββββββββββ| 10095/10682 [1:37:11<04:54, 1.99it/s]
|
756 |
95%|ββββββββββ| 10096/10682 [1:37:12<04:53, 2.00it/s]
|
757 |
95%|ββββββββββ| 10097/10682 [1:37:12<04:52, 2.00it/s]
|
758 |
95%|ββββββββββ| 10098/10682 [1:37:13<04:52, 2.00it/s]
|
759 |
95%|ββββββββββ| 10099/10682 [1:37:13<04:51, 2.00it/s]
|
760 |
95%|ββββββββββ| 10100/10682 [1:37:14<04:51, 2.00it/s]
|
761 |
|
762 |
+
|
763 |
95%|ββββββββββ| 10100/10682 [1:37:14<04:51, 2.00it/s]
|
764 |
95%|ββββββββββ| 10101/10682 [1:37:15<05:15, 1.84it/s]
|
765 |
95%|ββββββββββ| 10102/10682 [1:37:15<05:08, 1.88it/s]
|
766 |
95%|ββββββββββ| 10103/10682 [1:37:16<05:02, 1.92it/s]
|
767 |
95%|ββββββββββ| 10104/10682 [1:37:16<04:57, 1.94it/s]
|
768 |
95%|ββββββββββ| 10105/10682 [1:37:17<04:55, 1.96it/s]
|
769 |
95%|ββββββββββ| 10106/10682 [1:37:17<04:53, 1.97it/s]
|
770 |
95%|ββββββββββ| 10107/10682 [1:37:18<04:51, 1.97it/s]
|
771 |
95%|ββββββββββ| 10108/10682 [1:37:18<04:49, 1.98it/s]
|
772 |
95%|ββββββββββ| 10109/10682 [1:37:19<04:48, 1.99it/s]
|
773 |
95%|ββββββββββ| 10110/10682 [1:37:19<04:47, 1.99it/s]
|
774 |
95%|ββββββββββ| 10111/10682 [1:37:20<04:46, 1.99it/s]
|
775 |
95%|ββββββββββ| 10112/10682 [1:37:20<04:45, 1.99it/s]
|
776 |
95%|ββββββββββ| 10113/10682 [1:37:21<04:45, 1.99it/s]
|
777 |
95%|ββββββββββ| 10114/10682 [1:37:21<04:44, 2.00it/s]
|
778 |
95%|ββββββββββ| 10115/10682 [1:37:22<04:44, 2.00it/s]
|
779 |
95%|ββββββββββ| 10116/10682 [1:37:22<04:43, 1.99it/s]
|
780 |
95%|ββββββββββ| 10117/10682 [1:37:23<04:43, 1.99it/s]
|
781 |
95%|ββββββββββ| 10118/10682 [1:37:23<04:42, 2.00it/s]
|
782 |
95%|ββββββββββ| 10119/10682 [1:37:24<04:42, 2.00it/s]
|
783 |
95%|ββββββββββ| 10120/10682 [1:37:24<04:41, 2.00it/s]
|
784 |
95%|ββββββββββ| 10121/10682 [1:37:25<04:41, 1.99it/s]
|
785 |
95%|ββββββββββ| 10122/10682 [1:37:25<04:41, 1.99it/s]
|
786 |
95%|ββββββββββ| 10123/10682 [1:37:26<04:40, 1.99it/s]
|
787 |
95%|ββββββββββ| 10124/10682 [1:37:26<04:39, 2.00it/s]
|
788 |
95%|ββββββββββ| 10125/10682 [1:37:27<04:39, 2.00it/s]{'loss': 3.1083, 'grad_norm': 0.24928326904773712, 'learning_rate': 8.261001828055447e-06, 'epoch': 13.27}
|
789 |
|
790 |
+
|
791 |
95%|ββββββββββ| 10125/10682 [1:37:27<04:39, 2.00it/s]
|
792 |
95%|ββββββββββ| 10126/10682 [1:37:27<04:38, 1.99it/s]
|
793 |
95%|ββββββββββ| 10127/10682 [1:37:28<04:38, 1.99it/s]
|
794 |
95%|ββββββββββ| 10128/10682 [1:37:28<04:37, 2.00it/s]
|
795 |
95%|ββββββββββ| 10129/10682 [1:37:29<04:36, 2.00it/s]
|
796 |
95%|ββββββββββ| 10130/10682 [1:37:29<04:36, 2.00it/s]
|
797 |
95%|ββββββββββ| 10131/10682 [1:37:30<04:36, 1.99it/s]
|
798 |
95%|ββββββββββ| 10132/10682 [1:37:30<04:36, 1.99it/s]
|
799 |
95%|ββββββββββ| 10133/10682 [1:37:31<04:35, 1.99it/s]
|
800 |
95%|ββββββββββ| 10134/10682 [1:37:31<04:34, 1.99it/s]
|
801 |
95%|ββββββββββ| 10135/10682 [1:37:32<04:33, 2.00it/s]
|
802 |
95%|ββββββββββ| 10136/10682 [1:37:32<04:33, 2.00it/s]
|
803 |
95%|ββββββββββ| 10137/10682 [1:37:33<04:32, 2.00it/s]
|
804 |
95%|ββββββββββ| 10138/10682 [1:37:33<04:32, 1.99it/s]
|
805 |
95%|ββββββββββ| 10139/10682 [1:37:34<04:32, 1.99it/s]
|
806 |
95%|ββββββββββ| 10140/10682 [1:37:34<04:32, 1.99it/s]
|
807 |
95%|ββββββββββ| 10141/10682 [1:37:35<04:31, 2.00it/s]
|
808 |
95%|ββββββββββ| 10142/10682 [1:37:35<04:30, 2.00it/s]
|
809 |
95%|ββββββββββ| 10143/10682 [1:37:36<04:30, 1.99it/s]
|
810 |
95%|ββββββββββ| 10144/10682 [1:37:36<04:29, 1.99it/s]
|
811 |
95%|ββββββββββ| 10145/10682 [1:37:37<04:29, 1.99it/s]
|
812 |
95%|ββββββββββ| 10146/10682 [1:37:37<04:28, 1.99it/s]
|
813 |
95%|ββββββββββ| 10147/10682 [1:37:38<04:27, 2.00it/s]
|
814 |
95%|ββββββββββ| 10148/10682 [1:37:38<04:27, 2.00it/s]
|
815 |
95%|ββββββββββ| 10149/10682 [1:37:39<04:27, 1.99it/s]
|
816 |
95%|ββββββββββ| 10150/10682 [1:37:39<04:27, 1.99it/s]{'loss': 3.1143, 'grad_norm': 0.2494010180234909, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.3}
|
817 |
+
|
818 |
|
819 |
95%|ββββββββββ| 10150/10682 [1:37:39<04:27, 1.99it/s]
|
820 |
95%|ββββββββββ| 10151/10682 [1:37:40<04:26, 1.99it/s]
|
821 |
95%|ββββββββββ| 10152/10682 [1:37:40<04:25, 1.99it/s]
|
822 |
95%|ββββββββββ| 10153/10682 [1:37:41<04:25, 1.99it/s]
|
823 |
95%|ββββββββββ| 10154/10682 [1:37:41<04:24, 1.99it/s]
|
824 |
95%|ββββββββββ| 10155/10682 [1:37:42<04:24, 1.99it/s]
|
825 |
95%|ββββββββββ| 10156/10682 [1:37:42<04:23, 1.99it/s]
|
826 |
95%|ββββββββββ| 10157/10682 [1:37:43<04:23, 1.99it/s]
|
827 |
95%|ββββββββββ| 10158/10682 [1:37:43<04:22, 2.00it/s]
|
828 |
95%|ββββββββββ| 10159/10682 [1:37:44<04:22, 2.00it/s]
|
829 |
95%|ββββββββββ| 10160/10682 [1:37:44<04:21, 2.00it/s]
|
830 |
95%|ββββββββββ| 10161/10682 [1:37:45<04:20, 2.00it/s]
|
831 |
95%|ββββββββββ| 10162/10682 [1:37:45<04:20, 2.00it/s]
|
832 |
95%|ββββββββββ| 10163/10682 [1:37:46<04:19, 2.00it/s]
|
833 |
95%|ββββββββββ| 10164/10682 [1:37:46<04:19, 2.00it/s]
|
834 |
95%|ββββββββββ| 10165/10682 [1:37:47<04:19, 1.99it/s]
|
835 |
95%|ββββββββββ| 10166/10682 [1:37:47<04:18, 2.00it/s]
|
836 |
95%|ββββββββββ| 10167/10682 [1:37:48<04:17, 2.00it/s]
|
837 |
95%|ββββββββββ| 10168/10682 [1:37:48<04:17, 2.00it/s]
|
838 |
95%|ββββββββββ| 10169/10682 [1:37:49<04:16, 2.00it/s]
|
839 |
95%|ββββββββββ| 10170/10682 [1:37:49<04:16, 2.00it/s]
|
840 |
95%|ββββββββββ| 10171/10682 [1:37:50<04:15, 2.00it/s]
|
841 |
95%|ββββββββββ| 10172/10682 [1:37:50<04:15, 2.00it/s]
|
842 |
95%|ββββββββββ| 10173/10682 [1:37:51<04:15, 1.99it/s]
|
843 |
95%|ββββββββββ| 10174/10682 [1:37:51<04:15, 1.99it/s]
|
844 |
95%|ββββββββββ| 10175/10682 [1:37:52<04:15, 1.99it/s]{'loss': 3.1081, 'grad_norm': 0.24675361812114716, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.34}
|
845 |
+
|
846 |
|
847 |
95%|ββββββββββ| 10175/10682 [1:37:52<04:15, 1.99it/s]
|
848 |
95%|ββββββββββ| 10176/10682 [1:37:52<04:14, 1.99it/s]
|
849 |
95%|ββββββββββ| 10177/10682 [1:37:53<04:13, 1.99it/s]
|
850 |
95%|ββββββββββ| 10178/10682 [1:37:53<04:12, 1.99it/s]
|
851 |
95%|ββββββββββ| 10179/10682 [1:37:54<04:12, 1.99it/s]
|
852 |
95%|ββββββββββ| 10180/10682 [1:37:54<04:12, 1.99it/s]
|
853 |
95%|ββββββββββ| 10181/10682 [1:37:55<04:11, 1.99it/s]
|
854 |
95%|ββββββββββ| 10182/10682 [1:37:55<04:10, 1.99it/s]
|
855 |
95%|ββββββββββ| 10183/10682 [1:37:56<04:10, 1.99it/s]
|
856 |
95%|ββββββββββ| 10184/10682 [1:37:56<04:09, 1.99it/s]
|
857 |
95%|ββββββββββ| 10185/10682 [1:37:57<04:09, 1.99it/s]
|
858 |
95%|ββββββββββ| 10186/10682 [1:37:57<04:08, 2.00it/s]
|
859 |
95%|ββββββββββ| 10187/10682 [1:37:58<04:08, 2.00it/s]
|
860 |
95%|ββββββββββ| 10188/10682 [1:37:58<04:07, 1.99it/s]
|
861 |
95%|ββββββββββ| 10189/10682 [1:37:59<04:07, 1.99it/s]
|
862 |
95%|ββββββββββ| 10190/10682 [1:37:59<04:06, 1.99it/s]
|
863 |
95%|ββββββββββ| 10191/10682 [1:38:00<04:06, 1.99it/s]
|
864 |
95%|ββββββββββ| 10192/10682 [1:38:00<04:05, 1.99it/s]
|
865 |
95%|ββββββββββ| 10193/10682 [1:38:01<04:05, 2.00it/s]
|
866 |
95%|ββββββββββ| 10194/10682 [1:38:01<04:04, 1.99it/s]
|
867 |
95%|ββββββββββ| 10195/10682 [1:38:02<04:04, 1.99it/s]
|
868 |
95%|ββββββββββ| 10196/10682 [1:38:02<04:03, 1.99it/s]
|
869 |
95%|ββββββββββ| 10197/10682 [1:38:03<04:03, 1.99it/s]
|
870 |
95%|ββββββββββ| 10198/10682 [1:38:03<04:02, 2.00it/s]
|
871 |
95%|ββββββββββ| 10199/10682 [1:38:04<04:02, 1.99it/s]
|
872 |
95%|ββββββββββ| 10200/10682 [1:38:04<04:01, 1.99it/s]{'loss': 3.1186, 'grad_norm': 0.24594959616661072, 'learning_rate': 6.190386348572108e-06, 'epoch': 13.37}
|
873 |
|
874 |
+
|
875 |
95%|ββββββββββ| 10200/10682 [1:38:04<04:01, 1.99it/s]
|
876 |
95%|ββββββββββ| 10201/10682 [1:38:05<04:01, 1.99it/s]
|
877 |
96%|ββββββββββ| 10202/10682 [1:38:05<04:00, 1.99it/s]
|
878 |
96%|ββββββββββ| 10203/10682 [1:38:06<03:59, 2.00it/s]
|
879 |
96%|ββββββββββ| 10204/10682 [1:38:06<03:59, 2.00it/s]
|
880 |
96%|ββββββββββ| 10205/10682 [1:38:07<03:58, 2.00it/s]
|
881 |
96%|ββββββββββ| 10206/10682 [1:38:07<03:58, 1.99it/s]
|
882 |
96%|ββββββββββ| 10207/10682 [1:38:08<03:58, 1.99it/s]
|
883 |
96%|ββββββββββ| 10208/10682 [1:38:08<03:57, 2.00it/s]
|
884 |
96%|ββββββββββ| 10209/10682 [1:38:09<03:56, 2.00it/s]
|
885 |
96%|ββββββββββ| 10210/10682 [1:38:09<03:56, 2.00it/s]
|
886 |
96%|ββββββββββ| 10211/10682 [1:38:10<04:16, 1.83it/s]
|
887 |
96%|ββββββββββ| 10212/10682 [1:38:10<04:10, 1.88it/s]
|
888 |
96%|ββββββββββ| 10213/10682 [1:38:11<04:05, 1.91it/s]
|
889 |
96%|ββββββββββ| 10214/10682 [1:38:11<04:01, 1.94it/s]
|
890 |
96%|ββββββββββ| 10215/10682 [1:38:12<03:58, 1.96it/s]
|
891 |
96%|ββββββββββ| 10216/10682 [1:38:12<03:57, 1.97it/s]
|
892 |
96%|ββββββββββ| 10217/10682 [1:38:13<03:55, 1.97it/s]
|
893 |
96%|ββββββββββ| 10218/10682 [1:38:13<03:54, 1.98it/s]
|
894 |
96%|ββββββββββ| 10219/10682 [1:38:14<03:53, 1.98it/s]
|
895 |
96%|ββββββββββ| 10220/10682 [1:38:14<03:52, 1.99it/s]
|
896 |
96%|ββββββββββ| 10221/10682 [1:38:15<03:51, 1.99it/s]
|
897 |
96%|ββββββββββ| 10222/10682 [1:38:15<03:51, 1.99it/s]
|
898 |
96%|ββββββββββ| 10223/10682 [1:38:16<03:50, 1.99it/s]
|
899 |
96%|ββββββββββ| 10224/10682 [1:38:16<03:50, 1.99it/s]
|
900 |
96%|ββββββββββ| 10225/10682 [1:38:17<03:49, 1.99it/s]
|
901 |
{'loss': 3.1084, 'grad_norm': 0.24636626243591309, 'learning_rate': 5.56604678228706e-06, 'epoch': 13.4}
|
902 |
+
|
903 |
96%|ββββββββββ| 10225/10682 [1:38:17<03:49, 1.99it/s]
|
904 |
96%|ββββββββββ| 10226/10682 [1:38:17<03:48, 1.99it/s]
|
905 |
96%|ββββββββββ| 10227/10682 [1:38:18<03:48, 1.99it/s]
|
906 |
96%|ββββββββββ| 10228/10682 [1:38:18<03:47, 1.99it/s]
|
907 |
96%|ββββββββββ| 10229/10682 [1:38:19<03:46, 2.00it/s]
|
908 |
96%|ββββββββββ| 10230/10682 [1:38:19<03:46, 1.99it/s]
|
909 |
96%|ββββββββββ| 10231/10682 [1:38:20<03:46, 1.99it/s]
|
910 |
96%|ββββββββββ| 10232/10682 [1:38:20<03:46, 1.99it/s]
|
911 |
96%|ββββββββββ| 10233/10682 [1:38:21<03:45, 1.99it/s]
|
912 |
96%|ββββββββββ| 10234/10682 [1:38:21<03:44, 1.99it/s]
|
913 |
96%|ββββββββββ| 10235/10682 [1:38:22<03:44, 1.99it/s]
|
914 |
96%|ββββββββββ| 10236/10682 [1:38:22<03:43, 1.99it/s]
|
915 |
96%|ββββββββββ| 10237/10682 [1:38:23<03:43, 1.99it/s]
|
916 |
96%|ββββββββββ| 10238/10682 [1:38:23<03:42, 1.99it/s]
|
917 |
96%|ββββββββββ| 10239/10682 [1:38:24<03:43, 1.98it/s]
|
918 |
96%|ββββββββββ| 10240/10682 [1:38:24<03:42, 1.99it/s]
|
919 |
96%|ββββββββββ| 10241/10682 [1:38:25<03:41, 1.99it/s]
|
920 |
96%|ββββββββββ| 10242/10682 [1:38:25<03:41, 1.99it/s]
|
921 |
96%|ββββββββββ| 10243/10682 [1:38:26<03:40, 1.99it/s]
|
922 |
96%|ββββββββββ| 10244/10682 [1:38:26<03:39, 1.99it/s]
|
923 |
96%|ββββββββββ| 10245/10682 [1:38:27<03:39, 1.99it/s]
|
924 |
96%|ββββββββββ| 10246/10682 [1:38:27<03:38, 1.99it/s]
|
925 |
96%|ββββββββββ| 10247/10682 [1:38:28<03:38, 1.99it/s]
|
926 |
96%|ββββββββββ| 10248/10682 [1:38:28<03:37, 1.99it/s]
|
927 |
96%|ββββββββββ| 10249/10682 [1:38:29<03:37, 1.99it/s]
|
928 |
96%|ββββββββββ| 10250/10682 [1:38:29<03:36, 2.00it/s]
|
929 |
{'loss': 3.1113, 'grad_norm': 0.24913303554058075, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
930 |
+
|
931 |
96%|ββββββββββ| 10250/10682 [1:38:29<03:36, 2.00it/s]
|
932 |
96%|ββββββββββ| 10251/10682 [1:38:30<03:36, 1.99it/s]
|
933 |
96%|ββββββββββ| 10252/10682 [1:38:30<03:35, 1.99it/s]
|
934 |
96%|ββββββββββ| 10253/10682 [1:38:31<03:35, 1.99it/s]
|
935 |
96%|ββββββββββ| 10254/10682 [1:38:31<03:34, 1.99it/s]
|
936 |
96%|ββββββββββ| 10255/10682 [1:38:32<03:34, 1.99it/s]
|
937 |
96%|ββββββββββ| 10256/10682 [1:38:32<03:33, 1.99it/s]
|
938 |
96%|ββββββββββ| 10257/10682 [1:38:33<03:33, 1.99it/s]
|
939 |
96%|ββββββββββ| 10258/10682 [1:38:33<03:32, 1.99it/s]
|
940 |
96%|ββββββββββ| 10259/10682 [1:38:34<03:32, 1.99it/s]
|
941 |
96%|ββββββββββ| 10260/10682 [1:38:34<03:31, 1.99it/s]
|
942 |
96%|ββββββββββ| 10261/10682 [1:38:35<03:31, 1.99it/s]
|
943 |
96%|ββββββββββ| 10262/10682 [1:38:35<03:30, 1.99it/s]
|
944 |
96%|ββββββββββ| 10263/10682 [1:38:36<03:30, 1.99it/s]
|
945 |
96%|ββββββββββ| 10264/10682 [1:38:36<03:29, 1.99it/s]
|
946 |
96%|ββββββββββ| 10265/10682 [1:38:37<03:29, 1.99it/s]
|
947 |
96%|ββββββββββ| 10266/10682 [1:38:37<03:29, 1.99it/s]
|
948 |
96%|ββββββββββ| 10267/10682 [1:38:38<03:28, 1.99it/s]
|
949 |
96%|ββββββββββ| 10268/10682 [1:38:38<03:28, 1.99it/s]
|
950 |
96%|ββββββββββ| 10269/10682 [1:38:39<03:27, 1.99it/s]
|
951 |
96%|ββββββββββ| 10270/10682 [1:38:39<03:26, 1.99it/s]
|
952 |
96%|ββββββββββ| 10271/10682 [1:38:40<03:26, 1.99it/s]
|
953 |
96%|ββββββββββ| 10272/10682 [1:38:40<03:25, 1.99it/s]
|
954 |
96%|ββββββββββ| 10273/10682 [1:38:41<03:25, 1.99it/s]
|
955 |
96%|ββββββββββ| 10274/10682 [1:38:41<03:24, 1.99it/s]
|
956 |
96%|ββββββββββ| 10275/10682 [1:38:42<03:24, 1.99it/s]
|
957 |
|
958 |
+
|
959 |
96%|ββββββββββ| 10275/10682 [1:38:42<03:24, 1.99it/s]
|
960 |
96%|ββββββββββ| 10276/10682 [1:38:42<03:23, 1.99it/s]
|
961 |
96%|ββββββββββ| 10277/10682 [1:38:43<03:23, 1.99it/s]
|
962 |
96%|ββββββββββ| 10278/10682 [1:38:43<03:22, 1.99it/s]
|
963 |
96%|ββββββββββ| 10279/10682 [1:38:44<03:22, 1.99it/s]
|
964 |
96%|ββββββββββ| 10280/10682 [1:38:44<03:21, 1.99it/s]
|
965 |
96%|ββββββββββ| 10281/10682 [1:38:45<03:21, 1.99it/s]
|
966 |
96%|ββββββββββ| 10282/10682 [1:38:45<03:20, 1.99it/s]
|
967 |
96%|ββββββββββ| 10283/10682 [1:38:46<03:20, 1.99it/s]
|
968 |
96%|ββββββββββ| 10284/10682 [1:38:46<03:19, 1.99it/s]
|
969 |
96%|ββββββββββ| 10285/10682 [1:38:47<03:19, 1.99it/s]
|
970 |
96%|ββββββββββ| 10286/10682 [1:38:47<03:18, 1.99it/s]
|
971 |
96%|ββββββββββ| 10287/10682 [1:38:48<03:18, 1.99it/s]
|
972 |
96%|ββββββββββ| 10288/10682 [1:38:48<03:17, 1.99it/s]
|
973 |
96%|ββββββββββ| 10289/10682 [1:38:49<03:17, 1.99it/s]
|
974 |
96%|ββββββββββ| 10290/10682 [1:38:50<03:17, 1.99it/s]
|
975 |
96%|ββββββββββ| 10291/10682 [1:38:50<03:16, 1.99it/s]
|
976 |
96%|ββββββββββ| 10292/10682 [1:38:51<03:15, 1.99it/s]
|
977 |
96%|ββββββββββ| 10293/10682 [1:38:51<03:15, 1.99it/s]
|
978 |
96%|ββββββββββ| 10294/10682 [1:38:52<03:14, 1.99it/s]
|
979 |
96%|ββββββββββ| 10295/10682 [1:38:52<03:14, 1.99it/s]
|
980 |
96%|ββββββββββ| 10296/10682 [1:38:53<03:13, 1.99it/s]
|
981 |
96%|ββββββββββ| 10297/10682 [1:38:53<03:13, 1.99it/s]
|
982 |
96%|ββββββββββ| 10298/10682 [1:38:54<03:12, 1.99it/s]
|
983 |
96%|ββββββββββ| 10299/10682 [1:38:54<03:12, 1.99it/s]
|
984 |
96%|ββββββββββ| 10300/10682 [1:38:55<03:11, 1.99it/s]{'loss': 3.111, 'grad_norm': 0.2491437792778015, 'learning_rate': 3.891208300917604e-06, 'epoch': 13.5}
|
985 |
+
|
986 |
|
987 |
96%|ββββββββββ| 10300/10682 [1:38:55<03:11, 1.99it/s]
|
988 |
96%|ββββββββββ| 10301/10682 [1:38:55<03:11, 1.99it/s]
|
989 |
96%|ββββββββββ| 10302/10682 [1:38:56<03:10, 1.99it/s]
|
990 |
96%|ββββββββββ| 10303/10682 [1:38:56<03:10, 1.99it/s]
|
991 |
96%|ββββββββββ| 10304/10682 [1:38:57<03:10, 1.99it/s]
|
992 |
96%|ββββββββββ| 10305/10682 [1:38:57<03:09, 1.99it/s]
|
993 |
96%|ββββββββββ| 10306/10682 [1:38:58<03:09, 1.99it/s]
|
994 |
96%|ββββββββββ| 10307/10682 [1:38:58<03:08, 1.99it/s]
|
995 |
96%|ββββββββββ| 10308/10682 [1:38:59<03:07, 1.99it/s]
|
996 |
97%|ββββββββββ| 10309/10682 [1:38:59<03:07, 1.99it/s]
|
997 |
97%|ββββββββββ| 10310/10682 [1:39:00<03:06, 1.99it/s]
|
998 |
97%|ββββββββββ| 10311/10682 [1:39:00<03:06, 1.99it/s]
|
999 |
97%|ββββββββββ| 10312/10682 [1:39:01<03:05, 1.99it/s]
|
1000 |
97%|ββββββββββ| 10313/10682 [1:39:01<03:05, 1.99it/s]
|
1001 |
97%|ββββββββββ| 10314/10682 [1:39:02<03:04, 1.99it/s]
|
1002 |
97%|ββββββββββ| 10315/10682 [1:39:02<03:04, 1.99it/s]
|
1003 |
97%|ββββββββββ| 10316/10682 [1:39:03<03:03, 1.99it/s]
|
1004 |
97%|ββββββββββ| 10317/10682 [1:39:03<03:03, 1.99it/s]
|
1005 |
97%|ββββββββββ| 10318/10682 [1:39:04<03:02, 1.99it/s]
|
1006 |
97%|ββββββββββ| 10319/10682 [1:39:04<03:02, 1.99it/s]
|
1007 |
97%|ββββββββββ| 10320/10682 [1:39:05<03:01, 1.99it/s]
|
1008 |
97%|ββββββββββ| 10321/10682 [1:39:05<03:01, 1.99it/s]
|
1009 |
97%|ββββββββββ| 10322/10682 [1:39:06<03:00, 1.99it/s]
|
1010 |
97%|ββββββββββ| 10323/10682 [1:39:06<02:59, 1.99it/s]
|
1011 |
97%|ββββββββββ| 10324/10682 [1:39:07<02:59, 1.99it/s]
|
1012 |
97%|ββββββββββ| 10325/10682 [1:39:07<02:59, 1.99it/s]{'loss': 3.1165, 'grad_norm': 0.24889500439167023, 'learning_rate': 3.3991130994299734e-06, 'epoch': 13.53}
|
1013 |
+
|
1014 |
|
1015 |
97%|ββββββββββ| 10325/10682 [1:39:07<02:59, 1.99it/s]
|
1016 |
97%|ββββββββββ| 10326/10682 [1:39:08<02:58, 1.99it/s]
|
1017 |
97%|ββββββββββ| 10327/10682 [1:39:08<02:58, 1.99it/s]
|
1018 |
97%|ββββββββββ| 10328/10682 [1:39:09<02:57, 1.99it/s]
|
1019 |
97%|ββββββββββ| 10329/10682 [1:39:09<02:57, 1.99it/s]
|
1020 |
97%|ββββββββββ| 10330/10682 [1:39:10<02:56, 1.99it/s]
|
1021 |
97%|ββββββββββ| 10331/10682 [1:39:10<02:56, 1.99it/s]
|
1022 |
97%|ββββββββββ| 10332/10682 [1:39:11<02:55, 1.99it/s]
|
1023 |
97%|ββββββββββ| 10333/10682 [1:39:11<02:55, 1.99it/s]
|
1024 |
97%|ββββββββββ| 10334/10682 [1:39:12<02:54, 1.99it/s]
|
1025 |
97%|ββββββββββ| 10335/10682 [1:39:12<02:54, 1.99it/s]
|
1026 |
97%|ββββββββββ| 10336/10682 [1:39:13<02:53, 1.99it/s]
|
1027 |
97%|ββββββββββ| 10337/10682 [1:39:13<02:53, 1.99it/s]
|
1028 |
97%|ββββββββββ| 10338/10682 [1:39:14<02:52, 1.99it/s]
|
1029 |
97%|ββββββββββ| 10339/10682 [1:39:14<02:52, 1.99it/s]
|
1030 |
97%|ββββββββββ| 10340/10682 [1:39:15<02:51, 1.99it/s]
|
1031 |
97%|ββββββββββ| 10341/10682 [1:39:15<02:51, 1.99it/s]
|
1032 |
97%|ββββββββββ| 10342/10682 [1:39:16<02:50, 1.99it/s]
|
1033 |
97%|ββββββββββ| 10343/10682 [1:39:16<02:50, 1.99it/s]
|
1034 |
97%|ββββββββββ| 10344/10682 [1:39:17<02:49, 1.99it/s]
|
1035 |
97%|ββββββββββ| 10345/10682 [1:39:17<02:49, 1.99it/s]
|
1036 |
97%|ββββββββββ| 10346/10682 [1:39:18<02:48, 1.99it/s]
|
1037 |
97%|ββββββββββ| 10347/10682 [1:39:18<02:48, 1.99it/s]
|
1038 |
97%|ββββββββββ| 10348/10682 [1:39:19<02:47, 1.99it/s]
|
1039 |
97%|ββββββββββ| 10349/10682 [1:39:19<02:46, 1.99it/s]
|
1040 |
97%|ββββββββββ| 10350/10682 [1:39:20<02:46, 1.99it/s]{'loss': 3.1195, 'grad_norm': 0.2491111308336258, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
1041 |
+
|
1042 |
|
1043 |
97%|ββββββββββ| 10350/10682 [1:39:20<02:46, 1.99it/s]
|
1044 |
97%|ββββββββββ| 10351/10682 [1:39:20<02:46, 1.99it/s]
|
1045 |
97%|ββββββββββ| 10352/10682 [1:39:21<02:45, 1.99it/s]
|
1046 |
97%|ββββββββββ| 10353/10682 [1:39:21<02:45, 1.99it/s]
|
1047 |
97%|ββββββββββ| 10354/10682 [1:39:22<02:44, 1.99it/s]
|
1048 |
97%|ββββββββββ| 10355/10682 [1:39:22<02:44, 1.99it/s]
|
1049 |
97%|ββββββββββ| 10356/10682 [1:39:23<02:43, 1.99it/s]
|
1050 |
97%|ββββββββββ| 10357/10682 [1:39:23<02:43, 1.99it/s]
|
1051 |
97%|ββββββββββ| 10358/10682 [1:39:24<02:42, 1.99it/s]
|
1052 |
97%|ββββββββββ| 10359/10682 [1:39:24<02:43, 1.98it/s]
|
1053 |
97%|ββββββββββ| 10360/10682 [1:39:25<02:42, 1.98it/s]
|
1054 |
97%|ββββββββββ| 10361/10682 [1:39:25<02:41, 1.98it/s]
|
1055 |
97%|ββββββββββ| 10362/10682 [1:39:26<02:40, 1.99it/s]
|
1056 |
97%|ββββββββββ| 10363/10682 [1:39:26<02:40, 1.99it/s]
|
1057 |
97%|ββββββββββ| 10364/10682 [1:39:27<02:39, 1.99it/s]
|
1058 |
97%|ββββββββββ| 10365/10682 [1:39:27<02:39, 1.99it/s]
|
1059 |
97%|ββββββββββ| 10366/10682 [1:39:28<02:38, 1.99it/s]
|
1060 |
97%|ββββββββββ| 10367/10682 [1:39:28<02:38, 1.99it/s]
|
1061 |
97%|ββββββββββ| 10368/10682 [1:39:29<02:37, 1.99it/s]
|
1062 |
97%|ββββββββββ| 10369/10682 [1:39:29<02:37, 1.99it/s]
|
1063 |
97%|ββββββββββ| 10370/10682 [1:39:30<02:36, 1.99it/s]
|
1064 |
97%|ββββββββββ| 10371/10682 [1:39:30<02:36, 1.99it/s]
|
1065 |
97%|ββββββββββ| 10372/10682 [1:39:31<02:35, 1.99it/s]
|
1066 |
97%|ββββββββββ| 10373/10682 [1:39:31<02:35, 1.99it/s]
|
1067 |
97%|ββββββββββ| 10374/10682 [1:39:32<02:34, 1.99it/s]
|
1068 |
97%|ββββββββββ| 10375/10682 [1:39:32<02:34, 1.99it/s]{'loss': 3.1152, 'grad_norm': 0.24760933220386505, 'learning_rate': 2.5143995351817882e-06, 'epoch': 13.6}
|
1069 |
|
1070 |
+
|
1071 |
97%|ββββββββββ| 10375/10682 [1:39:32<02:34, 1.99it/s]
|
1072 |
97%|ββββββββββ| 10376/10682 [1:39:33<02:33, 1.99it/s]
|
1073 |
97%|ββββββββββ| 10377/10682 [1:39:33<02:33, 1.99it/s]
|
1074 |
97%|ββββββββββ| 10378/10682 [1:39:34<02:32, 1.99it/s]
|
1075 |
97%|ββββββββββ| 10379/10682 [1:39:34<02:32, 1.99it/s]
|
1076 |
97%|ββββββββββ| 10380/10682 [1:39:35<02:31, 1.99it/s]
|
1077 |
97%|ββββββββββ| 10381/10682 [1:39:35<02:31, 1.99it/s]
|
1078 |
97%|ββββββββββ| 10382/10682 [1:39:36<02:30, 1.99it/s]
|
1079 |
97%|ββββββββββ| 10383/10682 [1:39:36<02:29, 2.00it/s]
|
1080 |
97%|ββββββββββ| 10384/10682 [1:39:37<02:29, 1.99it/s]
|
1081 |
97%|ββββββββββ| 10385/10682 [1:39:37<02:28, 1.99it/s]
|
1082 |
97%|ββββββββββ| 10386/10682 [1:39:38<02:28, 1.99it/s]
|
1083 |
97%|ββββββββββ| 10387/10682 [1:39:38<02:27, 1.99it/s]
|
1084 |
97%|ββββββββββ| 10388/10682 [1:39:39<02:27, 1.99it/s]
|
1085 |
97%|ββββββββββ| 10389/10682 [1:39:39<02:26, 1.99it/s]
|
1086 |
97%|ββββββββββ| 10390/10682 [1:39:40<02:26, 1.99it/s]
|
1087 |
97%|ββββββββββ| 10391/10682 [1:39:40<02:25, 1.99it/s]
|
1088 |
97%|ββββββββββ| 10392/10682 [1:39:41<02:25, 1.99it/s]
|
1089 |
97%|ββββββββββ| 10393/10682 [1:39:41<02:25, 1.99it/s]
|
1090 |
97%|ββββββββββ| 10394/10682 [1:39:42<02:24, 2.00it/s]
|
1091 |
97%|ββββββββββ| 10395/10682 [1:39:42<02:23, 1.99it/s]
|
1092 |
97%|ββββββββββ| 10396/10682 [1:39:43<02:23, 1.99it/s]
|
1093 |
97%|ββββββββββ| 10397/10682 [1:39:43<02:23, 1.99it/s]
|
1094 |
97%|ββββββββββ| 10398/10682 [1:39:44<02:22, 1.99it/s]
|
1095 |
97%|ββββββββββ| 10399/10682 [1:39:44<02:21, 1.99it/s]
|
1096 |
97%|ββββββββββ| 10400/10682 [1:39:45<02:21, 2.00it/s]{'loss': 3.1212, 'grad_norm': 0.24558085203170776, 'learning_rate': 2.1218402281655835e-06, 'epoch': 13.63}
|
1097 |
+
|
1098 |
|
1099 |
97%|ββββββββββ| 10400/10682 [1:39:45<02:21, 2.00it/s]
|
1100 |
97%|ββββββββββ| 10401/10682 [1:39:45<02:20, 1.99it/s]
|
1101 |
97%|ββββββββββ| 10402/10682 [1:39:46<02:20, 1.99it/s]
|
1102 |
97%|ββββββββββ| 10403/10682 [1:39:46<02:20, 1.99it/s]
|
1103 |
97%|ββββββββββ| 10404/10682 [1:39:47<02:19, 1.99it/s]
|
1104 |
97%|ββββββββββ| 10405/10682 [1:39:47<02:19, 1.99it/s]
|
1105 |
97%|ββββββββββ| 10406/10682 [1:39:48<02:18, 1.99it/s]
|
1106 |
97%|ββββββββββ| 10407/10682 [1:39:48<02:17, 1.99it/s]
|
1107 |
97%|ββββββββββ| 10408/10682 [1:39:49<02:17, 1.99it/s]
|
1108 |
97%|ββββββββββ| 10409/10682 [1:39:49<02:17, 1.99it/s]
|
1109 |
97%|ββββββββββ| 10410/10682 [1:39:50<02:16, 1.99it/s]
|
1110 |
97%|ββββββββββ| 10411/10682 [1:39:50<02:16, 1.99it/s]
|
1111 |
97%|ββββββββββ| 10412/10682 [1:39:51<02:15, 1.99it/s]
|
1112 |
97%|ββββββββββ| 10413/10682 [1:39:51<02:14, 1.99it/s]
|
1113 |
97%|ββββββββββ| 10414/10682 [1:39:52<02:14, 1.99it/s]
|
1114 |
98%|ββββββββββ| 10415/10682 [1:39:52<02:13, 1.99it/s]
|
1115 |
98%|ββββββββββ| 10416/10682 [1:39:53<02:13, 1.99it/s]
|
1116 |
98%|ββββββββββ| 10417/10682 [1:39:53<02:13, 1.99it/s]
|
1117 |
98%|ββββββββββ| 10418/10682 [1:39:54<02:12, 1.99it/s]
|
1118 |
98%|ββββββββββ| 10419/10682 [1:39:54<02:11, 1.99it/s]
|
1119 |
98%|ββββββββββ| 10420/10682 [1:39:55<02:11, 2.00it/s]
|
1120 |
98%|ββββββββββ| 10421/10682 [1:39:55<02:10, 1.99it/s]
|
1121 |
98%|ββββββββββ| 10422/10682 [1:39:56<02:10, 1.99it/s]
|
1122 |
98%|ββββββββββ| 10423/10682 [1:39:56<02:10, 1.99it/s]
|
1123 |
98%|ββββββββββ| 10424/10682 [1:39:57<02:09, 1.99it/s]
|
1124 |
98%|ββββββββββ| 10425/10682 [1:39:57<02:08, 1.99it/s]{'loss': 3.1095, 'grad_norm': 0.24991337954998016, 'learning_rate': 1.7625149152127318e-06, 'epoch': 13.66}
|
1125 |
|
1126 |
+
|
1127 |
98%|ββββββββββ| 10425/10682 [1:39:57<02:08, 1.99it/s]
|
1128 |
98%|ββββββββββ| 10426/10682 [1:39:58<02:08, 1.99it/s]
|
1129 |
98%|ββββββββββ| 10427/10682 [1:39:58<02:07, 1.99it/s]
|
1130 |
98%|ββββββββββ| 10428/10682 [1:39:59<02:07, 1.99it/s]
|
1131 |
98%|ββββββββββ| 10429/10682 [1:39:59<02:07, 1.99it/s]
|
1132 |
98%|ββββββββββ| 10430/10682 [1:40:00<02:06, 1.99it/s]
|
1133 |
98%|ββββββββββ| 10431/10682 [1:40:00<02:06, 1.99it/s]
|
1134 |
98%|ββββββββββ| 10432/10682 [1:40:01<02:05, 1.99it/s]
|
1135 |
98%|ββββββββββ| 10433/10682 [1:40:01<02:04, 1.99it/s]
|
1136 |
98%|ββββββββββ| 10434/10682 [1:40:02<02:04, 1.99it/s]
|
1137 |
98%|ββββββββββ| 10435/10682 [1:40:02<02:04, 1.99it/s]
|
1138 |
98%|ββββββββββ| 10436/10682 [1:40:03<02:03, 1.99it/s]
|
1139 |
98%|ββββββββββ| 10437/10682 [1:40:03<02:03, 1.99it/s]
|
1140 |
98%|ββββββββββ| 10438/10682 [1:40:04<02:02, 1.99it/s]
|
1141 |
98%|ββββββββββ| 10439/10682 [1:40:04<02:02, 1.99it/s]
|
1142 |
98%|ββββββββββ| 10440/10682 [1:40:05<02:01, 1.99it/s]
|
1143 |
98%|ββββββββββ| 10441/10682 [1:40:05<02:01, 1.99it/s]
|
1144 |
98%|ββββββββββ| 10442/10682 [1:40:06<02:00, 1.99it/s]
|
1145 |
98%|ββββββββββ| 10443/10682 [1:40:06<02:00, 1.99it/s]
|
1146 |
98%|ββββββββββ| 10444/10682 [1:40:07<01:59, 1.99it/s]
|
1147 |
98%|ββββββββββ| 10445/10682 [1:40:07<01:58, 1.99it/s]
|
1148 |
98%|ββββββββββ| 10446/10682 [1:40:08<01:58, 1.99it/s]
|
1149 |
98%|ββββββββββ| 10447/10682 [1:40:08<01:58, 1.99it/s]
|
1150 |
98%|ββββββββββ| 10448/10682 [1:40:09<01:57, 1.99it/s]
|
1151 |
98%|ββββββββββ| 10449/10682 [1:40:09<01:57, 1.99it/s]
|
1152 |
98%|ββββββββββ| 10450/10682 [1:40:10<01:56, 1.99it/s]
|
1153 |
|
1154 |
+
|
1155 |
98%|ββββββββββ| 10450/10682 [1:40:10<01:56, 1.99it/s]
|
1156 |
98%|ββββββββββ| 10451/10682 [1:40:10<01:56, 1.99it/s]
|
1157 |
98%|ββββββββββ| 10452/10682 [1:40:11<01:55, 1.99it/s]
|
1158 |
98%|ββββββββββ| 10453/10682 [1:40:11<01:54, 1.99it/s]
|
1159 |
98%|ββββββββββ| 10454/10682 [1:40:12<01:54, 1.99it/s]
|
1160 |
98%|ββββββββββ| 10455/10682 [1:40:12<01:54, 1.99it/s]
|
1161 |
98%|ββββββββββ| 10456/10682 [1:40:13<01:53, 1.99it/s]
|
1162 |
98%|ββββββββββ| 10457/10682 [1:40:13<01:52, 1.99it/s]
|
1163 |
98%|ββββββββββ| 10458/10682 [1:40:14<01:52, 1.99it/s]
|
1164 |
98%|ββββββββββ| 10459/10682 [1:40:14<01:51, 1.99it/s]
|
1165 |
98%|ββββββββββ| 10460/10682 [1:40:15<01:51, 1.99it/s]
|
1166 |
98%|ββββββββββ| 10461/10682 [1:40:15<01:50, 1.99it/s]
|
1167 |
98%|ββββββββββ| 10462/10682 [1:40:16<01:50, 2.00it/s]
|
1168 |
98%|ββββββββββ| 10463/10682 [1:40:16<01:49, 1.99it/s]
|
1169 |
98%|ββββββββββ| 10464/10682 [1:40:17<01:49, 1.99it/s]
|
1170 |
98%|ββββββββββ| 10465/10682 [1:40:17<01:48, 1.99it/s]
|
1171 |
98%|ββββββββββ| 10466/10682 [1:40:18<01:48, 1.99it/s]
|
1172 |
98%|ββββββββββ| 10467/10682 [1:40:18<01:48, 1.99it/s]
|
1173 |
98%|ββββββββββ| 10468/10682 [1:40:19<01:47, 1.99it/s]
|
1174 |
98%|ββββββββββ| 10469/10682 [1:40:19<01:46, 1.99it/s]
|
1175 |
98%|ββββββββββ| 10470/10682 [1:40:20<01:46, 1.99it/s]
|
1176 |
98%|ββββββββββ| 10471/10682 [1:40:20<01:45, 1.99it/s]
|
1177 |
98%|ββββββββββ| 10472/10682 [1:40:21<01:45, 1.99it/s]
|
1178 |
98%|ββββββββββ| 10473/10682 [1:40:21<01:44, 1.99it/s]
|
1179 |
98%|ββββββββββ| 10474/10682 [1:40:22<01:44, 1.99it/s]
|
1180 |
98%|ββββββββββ| 10475/10682 [1:40:22<01:43, 1.99it/s]
|
1181 |
|
1182 |
+
|
1183 |
98%|ββββββββββ| 10475/10682 [1:40:22<01:43, 1.99it/s]
|
1184 |
98%|ββββββββββ| 10476/10682 [1:40:23<01:43, 1.99it/s]
|
1185 |
98%|ββββββββββ| 10477/10682 [1:40:23<01:42, 1.99it/s]
|
1186 |
98%|ββββββββββ| 10478/10682 [1:40:24<01:42, 1.99it/s]
|
1187 |
98%|ββββββββββ| 10479/10682 [1:40:24<01:42, 1.99it/s]
|
1188 |
98%|ββββββββββ| 10480/10682 [1:40:25<01:41, 1.99it/s]
|
1189 |
98%|ββββββββββ| 10481/10682 [1:40:25<01:40, 1.99it/s]
|
1190 |
98%|ββββββββββ| 10482/10682 [1:40:26<01:40, 1.99it/s]
|
1191 |
98%|ββββββββββ| 10483/10682 [1:40:26<01:39, 1.99it/s]
|
1192 |
98%|ββββββββββ| 10484/10682 [1:40:27<01:39, 1.99it/s]
|
1193 |
98%|ββββββββββ| 10485/10682 [1:40:27<01:38, 1.99it/s]
|
1194 |
98%|ββββββββββ| 10486/10682 [1:40:28<01:38, 1.99it/s]
|
1195 |
98%|ββββββββββ| 10487/10682 [1:40:28<01:38, 1.99it/s]
|
1196 |
98%|ββββββββββ| 10488/10682 [1:40:29<01:37, 1.99it/s]
|
1197 |
98%|ββββββββββ| 10489/10682 [1:40:29<01:36, 1.99it/s]
|
1198 |
98%|ββββββββββ| 10490/10682 [1:40:30<01:36, 1.99it/s]
|
1199 |
98%|ββββββββββ| 10491/10682 [1:40:30<01:35, 1.99it/s]
|
1200 |
98%|ββββββββββ| 10492/10682 [1:40:31<01:35, 1.99it/s]
|
1201 |
98%|ββββββββββ| 10493/10682 [1:40:31<01:34, 1.99it/s]
|
1202 |
98%|ββββββββββ| 10494/10682 [1:40:32<01:34, 1.99it/s]
|
1203 |
98%|ββββββββββ| 10495/10682 [1:40:32<01:33, 1.99it/s]
|
1204 |
98%|ββββββββββ| 10496/10682 [1:40:33<01:33, 1.99it/s]
|
1205 |
98%|ββββββββββ| 10497/10682 [1:40:33<01:32, 1.99it/s]
|
1206 |
98%|ββββββββββ| 10498/10682 [1:40:34<01:32, 2.00it/s]
|
1207 |
98%|ββββββββββ| 10499/10682 [1:40:34<01:31, 1.99it/s]
|
1208 |
98%|ββββββββββ| 10500/10682 [1:40:35<01:31, 1.99it/s]{'loss': 3.116, 'grad_norm': 0.24622003734111786, 'learning_rate': 8.841716933915555e-07, 'epoch': 13.76}
|
1209 |
+
|
1210 |
|
1211 |
98%|ββββββββββ| 10500/10682 [1:40:35<01:31, 1.99it/s]
|
1212 |
98%|ββββββββββ| 10501/10682 [1:40:35<01:30, 1.99it/s]
|
1213 |
98%|ββββββββββ| 10502/10682 [1:40:36<01:30, 1.99it/s]
|
1214 |
98%|ββββββββββ| 10503/10682 [1:40:36<01:29, 1.99it/s]
|
1215 |
98%|ββββββββββ| 10504/10682 [1:40:37<01:29, 1.99it/s]
|
1216 |
98%|ββββββββββ| 10505/10682 [1:40:37<01:28, 1.99it/s]
|
1217 |
98%|ββββββββββ| 10506/10682 [1:40:38<01:28, 1.99it/s]
|
1218 |
98%|ββββββββββ| 10507/10682 [1:40:38<01:27, 1.99it/s]
|
1219 |
98%|ββββββββββ| 10508/10682 [1:40:39<01:27, 1.99it/s]
|
1220 |
98%|ββββββββββ| 10509/10682 [1:40:39<01:26, 1.99it/s]
|
1221 |
98%|ββββββββββ| 10510/10682 [1:40:40<01:26, 1.99it/s]
|
1222 |
98%|ββββββββββ| 10511/10682 [1:40:40<01:25, 1.99it/s]
|
1223 |
98%|ββββββββββ| 10512/10682 [1:40:41<01:25, 1.99it/s]
|
1224 |
98%|ββββββββββ| 10513/10682 [1:40:41<01:24, 1.99it/s]
|
1225 |
98%|ββββββββββ| 10514/10682 [1:40:42<01:24, 1.99it/s]
|
1226 |
98%|ββββββββββ| 10515/10682 [1:40:42<01:23, 1.99it/s]
|
1227 |
98%|ββββββββββ| 10516/10682 [1:40:43<01:23, 1.99it/s]
|
1228 |
98%|ββββββββββ| 10517/10682 [1:40:43<01:22, 1.99it/s]
|
1229 |
98%|ββββββββββ| 10518/10682 [1:40:44<01:22, 1.99it/s]
|
1230 |
98%|ββββββββββ| 10519/10682 [1:40:44<01:21, 1.99it/s]
|
1231 |
98%|ββββββββββ| 10520/10682 [1:40:45<01:21, 1.99it/s]
|
1232 |
98%|ββββββββββ| 10521/10682 [1:40:45<01:20, 1.99it/s]
|
1233 |
99%|ββββββββββ| 10522/10682 [1:40:46<01:20, 1.99it/s]
|
1234 |
99%|ββββββββββ| 10523/10682 [1:40:46<01:19, 1.99it/s]
|
1235 |
99%|ββββββββββ| 10524/10682 [1:40:47<01:19, 1.99it/s]
|
1236 |
99%|ββββββββββ| 10525/10682 [1:40:47<01:18, 1.99it/s]{'loss': 3.1108, 'grad_norm': 0.24499115347862244, 'learning_rate': 6.580000036264244e-07, 'epoch': 13.79}
|
1237 |
+
|
1238 |
|
1239 |
99%|ββββββββββ| 10525/10682 [1:40:47<01:18, 1.99it/s]
|
1240 |
99%|ββββββββββ| 10526/10682 [1:40:48<01:18, 1.99it/s]
|
1241 |
99%|ββββββββββ| 10527/10682 [1:40:48<01:17, 1.99it/s]
|
1242 |
99%|ββββββββββ| 10528/10682 [1:40:49<01:17, 1.99it/s]
|
1243 |
99%|ββββββββββ| 10529/10682 [1:40:50<01:16, 1.99it/s]
|
1244 |
99%|ββββββββββ| 10530/10682 [1:40:50<01:16, 1.99it/s]
|
1245 |
99%|ββββββββββ| 10531/10682 [1:40:51<01:15, 1.99it/s]
|
1246 |
99%|ββββββββββ| 10532/10682 [1:40:51<01:15, 1.99it/s]
|
1247 |
99%|ββββββββββ| 10533/10682 [1:40:52<01:14, 1.99it/s]
|
1248 |
99%|ββββββββββ| 10534/10682 [1:40:52<01:14, 1.99it/s]
|
1249 |
99%|ββββββββββ| 10535/10682 [1:40:53<01:13, 1.99it/s]
|
1250 |
99%|ββββββββββ| 10536/10682 [1:40:53<01:13, 1.99it/s]
|
1251 |
99%|ββββββββββ| 10537/10682 [1:40:54<01:12, 1.99it/s]
|
1252 |
99%|ββββββββββ| 10538/10682 [1:40:54<01:12, 1.99it/s]
|
1253 |
99%|ββββββββββ| 10539/10682 [1:40:55<01:11, 1.99it/s]
|
1254 |
99%|ββββββββββ| 10540/10682 [1:40:55<01:11, 2.00it/s]
|
1255 |
99%|ββββββββββ| 10541/10682 [1:40:56<01:10, 2.00it/s]
|
1256 |
99%|ββββββββββ| 10542/10682 [1:40:56<01:10, 1.99it/s]
|
1257 |
99%|ββββββββββ| 10543/10682 [1:40:57<01:09, 1.99it/s]
|
1258 |
99%|ββββββββββ| 10544/10682 [1:40:57<01:09, 1.99it/s]
|
1259 |
99%|ββββββββββ| 10545/10682 [1:40:58<01:08, 1.99it/s]
|
1260 |
99%|ββββββββββ| 10546/10682 [1:40:58<01:08, 1.99it/s]
|
1261 |
99%|ββββββββββ| 10547/10682 [1:40:59<01:07, 1.99it/s]
|
1262 |
99%|ββββββββββ| 10548/10682 [1:40:59<01:07, 1.99it/s]
|
1263 |
99%|ββββββββββ| 10549/10682 [1:41:00<01:06, 1.99it/s]
|
1264 |
99%|ββββββββββ| 10550/10682 [1:41:00<01:06, 1.99it/s]
|
1265 |
{'loss': 3.1109, 'grad_norm': 0.24714048206806183, 'learning_rate': 4.651600211027507e-07, 'epoch': 13.83}
|
1266 |
+
|
1267 |
99%|ββββββββββ| 10550/10682 [1:41:00<01:06, 1.99it/s]
|
1268 |
99%|ββββββββββ| 10551/10682 [1:41:01<01:05, 1.99it/s]
|
1269 |
99%|ββββββββββ| 10552/10682 [1:41:01<01:05, 1.99it/s]
|
1270 |
99%|ββββββββββ| 10553/10682 [1:41:02<01:04, 1.99it/s]
|
1271 |
99%|ββββββββββ| 10554/10682 [1:41:02<01:04, 1.99it/s]
|
1272 |
99%|ββββββββββ| 10555/10682 [1:41:03<01:03, 1.99it/s]
|
1273 |
99%|ββββββββββ| 10556/10682 [1:41:03<01:03, 1.99it/s]
|
1274 |
99%|ββββββββββ| 10557/10682 [1:41:04<01:02, 1.99it/s]
|
1275 |
99%|ββββββββββ| 10558/10682 [1:41:04<01:02, 1.99it/s]
|
1276 |
99%|ββββββββββ| 10559/10682 [1:41:05<01:01, 1.99it/s]
|
1277 |
99%|ββββββββββ| 10560/10682 [1:41:05<01:01, 1.99it/s]
|
1278 |
99%|ββββββββββ| 10561/10682 [1:41:06<01:00, 1.99it/s]
|
1279 |
99%|ββββββββββ| 10562/10682 [1:41:06<01:00, 1.99it/s]
|
1280 |
99%|ββββββββββ| 10563/10682 [1:41:07<00:59, 1.99it/s]
|
1281 |
99%|ββββββββββ| 10564/10682 [1:41:07<00:59, 1.99it/s]
|
1282 |
99%|ββββββββββ| 10565/10682 [1:41:08<00:58, 1.99it/s]
|
1283 |
99%|ββββββββββ| 10566/10682 [1:41:08<00:58, 1.99it/s]
|
1284 |
99%|ββββββββββ| 10567/10682 [1:41:09<00:57, 1.99it/s]
|
1285 |
99%|ββββββββββ| 10568/10682 [1:41:09<00:57, 1.99it/s]
|
1286 |
99%|ββββββββββ| 10569/10682 [1:41:10<00:56, 1.99it/s]
|
1287 |
99%|ββββββββββ| 10570/10682 [1:41:10<00:56, 1.99it/s]
|
1288 |
99%|ββββββββββ| 10571/10682 [1:41:11<00:55, 1.99it/s]
|
1289 |
99%|ββββββββββ| 10572/10682 [1:41:11<00:55, 1.99it/s]
|
1290 |
99%|ββββββββββ| 10573/10682 [1:41:12<00:54, 1.99it/s]
|
1291 |
99%|ββββββββββ| 10574/10682 [1:41:12<00:54, 1.99it/s]
|
1292 |
99%|ββββββββββ| 10575/10682 [1:41:13<00:53, 1.99it/s]{'loss': 3.1055, 'grad_norm': 0.24866575002670288, 'learning_rate': 3.0566461813213986e-07, 'epoch': 13.86}
|
1293 |
+
|
1294 |
|
1295 |
99%|ββββββββββ| 10575/10682 [1:41:13<00:53, 1.99it/s]
|
1296 |
99%|ββββββββββ| 10576/10682 [1:41:13<00:53, 1.99it/s]
|
1297 |
99%|ββββββββββ| 10577/10682 [1:41:14<00:52, 1.99it/s]
|
1298 |
99%|ββββββββββ| 10578/10682 [1:41:14<00:52, 1.99it/s]
|
1299 |
99%|ββββββββββ| 10579/10682 [1:41:15<00:51, 1.99it/s]
|
1300 |
99%|ββββββββββ| 10580/10682 [1:41:15<00:51, 1.99it/s]
|
1301 |
99%|ββββββββββ| 10581/10682 [1:41:16<00:50, 1.99it/s]
|
1302 |
99%|ββββββββββ| 10582/10682 [1:41:16<00:50, 1.99it/s]
|
1303 |
99%|ββββββββββ| 10583/10682 [1:41:17<00:49, 1.99it/s]
|
1304 |
99%|ββββββββββ| 10584/10682 [1:41:17<00:49, 1.99it/s]
|
1305 |
99%|ββββββββββ| 10585/10682 [1:41:18<00:48, 1.99it/s]
|
1306 |
99%|ββββββββββ| 10586/10682 [1:41:18<00:48, 1.99it/s]
|
1307 |
99%|ββββββββββ| 10587/10682 [1:41:19<00:47, 1.99it/s]
|
1308 |
99%|ββββββββββ| 10588/10682 [1:41:19<00:47, 1.99it/s]
|
1309 |
99%|ββββββββββ| 10589/10682 [1:41:20<00:46, 1.99it/s]
|
1310 |
99%|ββββββββββ| 10590/10682 [1:41:20<00:46, 1.99it/s]
|
1311 |
99%|ββββββββββ| 10591/10682 [1:41:21<00:45, 1.99it/s]
|
1312 |
99%|ββββββββββ| 10592/10682 [1:41:21<00:45, 1.99it/s]
|
1313 |
99%|ββββββββββ| 10593/10682 [1:41:22<00:44, 1.99it/s]
|
1314 |
99%|ββββββββββ| 10594/10682 [1:41:22<00:44, 1.99it/s]
|
1315 |
99%|ββββββββββ| 10595/10682 [1:41:23<00:43, 1.99it/s]
|
1316 |
99%|ββββββββββ| 10596/10682 [1:41:23<00:43, 1.99it/s]
|
1317 |
99%|ββββββββββ| 10597/10682 [1:41:24<00:42, 1.99it/s]
|
1318 |
99%|ββββββββββ| 10598/10682 [1:41:24<00:42, 1.97it/s]
|
1319 |
99%|ββββββββββ| 10599/10682 [1:41:25<00:41, 1.98it/s]
|
1320 |
99%|ββββββββββ| 10600/10682 [1:41:25<00:41, 1.98it/s]
|
1321 |
|
1322 |
+
|
1323 |
99%|ββββββββββ| 10600/10682 [1:41:25<00:41, 1.98it/s]
|
1324 |
99%|ββββββββββ| 10601/10682 [1:41:26<00:40, 1.99it/s]
|
1325 |
99%|ββββββββββ| 10602/10682 [1:41:26<00:40, 1.99it/s]
|
1326 |
99%|ββββββββββ| 10603/10682 [1:41:27<00:39, 1.99it/s]
|
1327 |
99%|ββββββββββ| 10604/10682 [1:41:27<00:39, 1.99it/s]
|
1328 |
99%|ββββββββββ| 10605/10682 [1:41:28<00:38, 1.99it/s]
|
1329 |
99%|ββββββββββ| 10606/10682 [1:41:28<00:38, 1.99it/s]
|
1330 |
99%|ββββββββββ| 10607/10682 [1:41:29<00:37, 1.99it/s]
|
1331 |
99%|ββββββββββ| 10608/10682 [1:41:29<00:37, 2.00it/s]
|
1332 |
99%|ββββββββββ| 10609/10682 [1:41:30<00:36, 2.00it/s]
|
1333 |
99%|ββββββββββ| 10610/10682 [1:41:30<00:36, 2.00it/s]
|
1334 |
99%|ββββββββββ| 10611/10682 [1:41:31<00:35, 1.99it/s]
|
1335 |
99%|ββββββββββ| 10612/10682 [1:41:31<00:35, 2.00it/s]
|
1336 |
99%|ββββββββββ| 10613/10682 [1:41:32<00:34, 2.00it/s]
|
1337 |
99%|ββββββββββ| 10614/10682 [1:41:32<00:34, 1.99it/s]
|
1338 |
99%|ββββββββββ| 10615/10682 [1:41:33<00:33, 1.99it/s]
|
1339 |
99%|ββββββββββ| 10616/10682 [1:41:33<00:33, 1.99it/s]
|
1340 |
99%|ββββββββββ| 10617/10682 [1:41:34<00:32, 1.99it/s]
|
1341 |
99%|ββββββββββ| 10618/10682 [1:41:34<00:32, 1.99it/s]
|
1342 |
99%|ββββββββββ| 10619/10682 [1:41:35<00:31, 1.99it/s]
|
1343 |
99%|ββββββββββ| 10620/10682 [1:41:35<00:31, 1.99it/s]
|
1344 |
99%|ββββββββββ| 10621/10682 [1:41:36<00:30, 1.99it/s]
|
1345 |
99%|ββββββββββ| 10622/10682 [1:41:36<00:30, 1.99it/s]
|
1346 |
99%|ββββββββββ| 10623/10682 [1:41:37<00:29, 1.99it/s]
|
1347 |
99%|ββββββββββ| 10624/10682 [1:41:37<00:29, 2.00it/s]
|
1348 |
99%|ββββββββββ| 10625/10682 [1:41:38<00:28, 1.99it/s]{'loss': 3.1138, 'grad_norm': 0.24934372305870056, 'learning_rate': 8.674791042273533e-08, 'epoch': 13.93}
|
1349 |
|
1350 |
+
|
1351 |
99%|ββββββββββ| 10625/10682 [1:41:38<00:28, 1.99it/s]
|
1352 |
99%|ββββββββββ| 10626/10682 [1:41:38<00:28, 1.99it/s]
|
1353 |
99%|ββββββββββ| 10627/10682 [1:41:39<00:27, 1.99it/s]
|
1354 |
99%|ββββββββββ| 10628/10682 [1:41:39<00:27, 2.00it/s]
|
1355 |
|
1356 |
+
|
1357 |
|
1358 |
+
|
1359 |
|
1360 |
+
|
1361 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|