Training in progress, epoch 12
Browse files- logs/events.out.tfevents.1715298064.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +31 -1
logs/events.out.tfevents.1715298064.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d227e76941721f0ac3578b2d44031298dc76b7e911b4e35d50eaaa757e9fa05c
|
3 |
+
size 88382
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf1c832644896eb09d95b6184f4c813d541a78091236581a707b2a7d9fdc96cd
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -452,4 +452,34 @@ Retrying in 1s [Retry 1/5].
|
|
452 |
|
453 |
85%|βββββββββ | 9100/10682 [1:17:21<13:01, 2.02it/s]
|
454 |
85%|βββββββββ | 9101/10682 [1:17:21<13:02, 2.02it/s]
|
455 |
85%|βββββββββ | 9102/10682 [1:17:22<13:01, 2.02it/s]
|
456 |
85%|βββββββββ | 9103/10682 [1:17:22<13:00, 2.02it/s]
|
457 |
85%|βββββββββ | 9104/10682 [1:17:23<13:00, 2.02it/s]
|
458 |
85%|βββββββββ | 9105/10682 [1:17:23<13:00, 2.02it/s]
|
459 |
85%|βββββββββ | 9106/10682 [1:17:24<13:00, 2.02it/s]
|
460 |
85%|βββββββββ | 9107/10682 [1:17:24<12:59, 2.02it/s]
|
461 |
85%|βββββββββ | 9108/10682 [1:17:25<12:58, 2.02it/s]
|
462 |
85%|βββββββββ | 9109/10682 [1:17:25<12:57, 2.02it/s]
|
463 |
85%|βββββββββ | 9110/10682 [1:17:26<12:57, 2.02it/s]
|
464 |
85%|βββββββββ | 9111/10682 [1:17:26<12:56, 2.02it/s]
|
465 |
85%|βββββββββ | 9112/10682 [1:17:27<12:56, 2.02it/s]
|
466 |
85%|βββββββββ | 9113/10682 [1:17:27<12:56, 2.02it/s]
|
467 |
85%|βββββββββ | 9114/10682 [1:17:28<12:54, 2.02it/s]
|
468 |
85%|βββββββββ | 9115/10682 [1:17:28<12:54, 2.02it/s]
|
469 |
85%|βββββββββ | 9116/10682 [1:17:29<12:54, 2.02it/s]
|
470 |
85%|βββββββββ | 9117/10682 [1:17:29<12:54, 2.02it/s]
|
471 |
85%|βββββββββ | 9118/10682 [1:17:30<12:53, 2.02it/s]
|
472 |
85%|βββββββββ | 9119/10682 [1:17:30<12:52, 2.02it/s]
|
473 |
85%|βββββββββ | 9120/10682 [1:17:31<12:52, 2.02it/s]
|
474 |
85%|βββββββββ | 9121/10682 [1:17:31<12:51, 2.02it/s]
|
475 |
85%|βββββββββ | 9122/10682 [1:17:32<12:51, 2.02it/s]
|
476 |
85%|βββββββββ | 9123/10682 [1:17:32<12:51, 2.02it/s]
|
477 |
85%|βββββββββ | 9124/10682 [1:17:33<12:51, 2.02it/s]
|
478 |
85%|βββββββββ | 9125/10682 [1:17:33<12:50, 2.02it/s]{'loss': 2.7976, 'grad_norm': 0.275244802236557, 'learning_rate': 6.334441157888504e-05, 'epoch': 11.95}
|
479 |
|
480 |
|
481 |
85%|βββββββββ | 9125/10682 [1:17:33<12:50, 2.02it/s]
|
482 |
85%|βββββββββ | 9126/10682 [1:17:34<12:52, 2.01it/s]
|
483 |
85%|βββββββββ | 9127/10682 [1:17:34<12:51, 2.02it/s]
|
484 |
85%|βββββββββ | 9128/10682 [1:17:35<12:49, 2.02it/s]
|
485 |
85%|βββββββββ | 9129/10682 [1:17:35<12:49, 2.02it/s]
|
486 |
85%|βββββββββ | 9130/10682 [1:17:36<12:47, 2.02it/s]
|
487 |
85%|βββββββββ | 9131/10682 [1:17:36<12:46, 2.02it/s]
|
488 |
85%|βββββββββ | 9132/10682 [1:17:37<12:46, 2.02it/s]
|
489 |
85%|βββββββββ | 9133/10682 [1:17:37<12:45, 2.02it/s]
|
490 |
86%|βββββββββ | 9134/10682 [1:17:38<12:45, 2.02it/s]
|
491 |
86%|βββββββββ | 9135/10682 [1:17:38<12:44, 2.02it/s]
|
492 |
86%|βββββββββ | 9136/10682 [1:17:39<12:44, 2.02it/s]
|
493 |
86%|βββββββββ | 9137/10682 [1:17:39<12:44, 2.02it/s]
|
494 |
86%|βββββββββ | 9138/10682 [1:17:40<12:43, 2.02it/s]
|
495 |
86%|βββββββββ | 9139/10682 [1:17:40<12:42, 2.02it/s]
|
496 |
86%|βββββββββ | 9140/10682 [1:17:41<12:42, 2.02it/s]
|
497 |
86%|βββββββββ | 9141/10682 [1:17:41<12:41, 2.02it/s]
|
498 |
86%|βββββββββ | 9142/10682 [1:17:42<12:41, 2.02it/s]
|
499 |
86%|βββββββββ | 9143/10682 [1:17:42<12:41, 2.02it/s]
|
500 |
86%|βββββββββ | 9144/10682 [1:17:42<12:41, 2.02it/s]
|
501 |
86%|βββββββββ | 9145/10682 [1:17:43<12:40, 2.02it/s]
|
502 |
86%|βββββββββ | 9146/10682 [1:17:43<12:39, 2.02it/s]
|
503 |
86%|βββββββββ | 9147/10682 [1:17:44<12:38, 2.02it/s]
|
504 |
86%|βββββββββ | 9148/10682 [1:17:44<12:38, 2.02it/s]
|
505 |
86%|βββββββββ | 9149/10682 [1:17:45<12:36, 2.03it/s]
|
506 |
86%|βββββββββ | 9150/10682 [1:17:45<12:36, 2.02it/s]
|
507 |
|
508 |
|
509 |
86%|βββββββββ | 9150/10682 [1:17:45<12:36, 2.02it/s]
|
510 |
86%|βββββββββ | 9151/10682 [1:17:46<12:36, 2.02it/s]
|
511 |
86%|βββββββββ | 9152/10682 [1:17:46<12:36, 2.02it/s]
|
512 |
86%|βββββββββ | 9153/10682 [1:17:47<12:35, 2.02it/s]
|
513 |
86%|βββββββββ | 9154/10682 [1:17:47<12:34, 2.02it/s]
|
514 |
86%|βββββββββ | 9155/10682 [1:17:48<12:34, 2.02it/s]
|
515 |
86%|βββββββββ | 9156/10682 [1:17:48<12:34, 2.02it/s]
|
516 |
86%|βββββββββ | 9157/10682 [1:17:49<12:34, 2.02it/s]
|
517 |
86%|βββββββββ | 9158/10682 [1:17:49<12:34, 2.02it/s]
|
518 |
86%|βββββββββ | 9159/10682 [1:17:50<12:33, 2.02it/s]
|
519 |
86%|βββββββββ | 9160/10682 [1:17:50<12:32, 2.02it/s]
|
520 |
86%|βββββββββ | 9161/10682 [1:17:51<12:32, 2.02it/s]
|
521 |
86%|βββββββββ | 9162/10682 [1:17:51<12:23, 2.04it/s]
|
522 |
86%|βββββββββ | 9163/10682 [1:18:03<1:39:32, 3.93s/it]
|
523 |
86%|βββββββββ | 9164/10682 [1:18:04<1:13:27, 2.90s/it]
|
524 |
86%|βββββββββ | 9165/10682 [1:18:04<55:07, 2.18s/it]
|
525 |
86%|βββββββββ | 9166/10682 [1:18:05<42:23, 1.68s/it]
|
526 |
86%|βββββββββ | 9167/10682 [1:18:05<33:24, 1.32s/it]
|
527 |
86%|βββββββββ | 9168/10682 [1:18:06<27:07, 1.07s/it]
|
528 |
86%|βββββββββ | 9169/10682 [1:18:06<22:43, 1.11it/s]
|
529 |
86%|βββββββββ | 9170/10682 [1:18:07<19:41, 1.28it/s]
|
530 |
86%|βββββββββ | 9171/10682 [1:18:07<17:30, 1.44it/s]
|
531 |
86%|βββββββββ | 9172/10682 [1:18:08<15:58, 1.57it/s]
|
532 |
86%|βββββββββ | 9173/10682 [1:18:08<14:55, 1.69it/s]
|
533 |
86%|βββββββββ | 9174/10682 [1:18:09<14:09, 1.78it/s]
|
534 |
86%|βββββββββ | 9175/10682 [1:18:09<13:39, 1.84it/s]
|
535 |
|
536 |
-
|
537 |
86%|βββββββββ | 9175/10682 [1:18:09<13:39, 1.84it/s]
|
538 |
86%|βββββββββ | 9176/10682 [1:18:10<13:16, 1.89it/s]
|
539 |
86%|βββββββββ | 9177/10682 [1:18:10<13:00, 1.93it/s]
|
540 |
86%|βββββββββ | 9178/10682 [1:18:11<12:49, 1.95it/s]
|
541 |
86%|βββββββββ | 9179/10682 [1:18:11<12:41, 1.97it/s]
|
542 |
86%|βββββββββ | 9180/10682 [1:18:12<12:35, 1.99it/s]
|
543 |
86%|βββββββββ | 9181/10682 [1:18:12<12:32, 1.99it/s]
|
544 |
86%|βββββββββ | 9182/10682 [1:18:13<12:28, 2.00it/s]
|
545 |
86%|βββββββββ | 9183/10682 [1:18:13<12:26, 2.01it/s]
|
546 |
86%|βββββββββ | 9184/10682 [1:18:14<12:24, 2.01it/s]
|
547 |
86%|βββββββββ | 9185/10682 [1:18:14<12:23, 2.01it/s]
|
548 |
86%|βββββββββ | 9186/10682 [1:18:15<12:21, 2.02it/s]
|
|
|
549 |
86%|βββββββββ | 9175/10682 [1:18:09<13:39, 1.84it/s]
|
550 |
86%|βββββββββ | 9176/10682 [1:18:10<13:16, 1.89it/s]
|
551 |
86%|βββββββββ | 9177/10682 [1:18:10<13:00, 1.93it/s]
|
552 |
86%|βββββββββ | 9178/10682 [1:18:11<12:49, 1.95it/s]
|
553 |
86%|βββββββββ | 9179/10682 [1:18:11<12:41, 1.97it/s]
|
554 |
86%|βββββββββ | 9180/10682 [1:18:12<12:35, 1.99it/s]
|
555 |
86%|βββββββββ | 9181/10682 [1:18:12<12:32, 1.99it/s]
|
556 |
86%|βββββββββ | 9182/10682 [1:18:13<12:28, 2.00it/s]
|
557 |
86%|βββββββββ | 9183/10682 [1:18:13<12:26, 2.01it/s]
|
558 |
86%|βββββββββ | 9184/10682 [1:18:14<12:24, 2.01it/s]
|
559 |
86%|βββββββββ | 9185/10682 [1:18:14<12:23, 2.01it/s]
|
560 |
86%|βββββββββ | 9186/10682 [1:18:15<12:21, 2.02it/s]
|
561 |
86%|βββββββββ | 9187/10682 [1:18:15<12:25, 2.01it/s]
|
562 |
86%|βββββββββ | 9188/10682 [1:18:16<12:23, 2.01it/s]
|
563 |
86%|βββββββββ | 9189/10682 [1:18:16<12:21, 2.01it/s]
|
564 |
86%|βββββββββ | 9190/10682 [1:18:17<12:20, 2.02it/s]
|
565 |
86%|βββββββββ | 9191/10682 [1:18:17<12:18, 2.02it/s]
|
566 |
86%|βββββββββ | 9192/10682 [1:18:18<12:18, 2.02it/s]
|
567 |
86%|βββββββββ | 9193/10682 [1:18:18<12:16, 2.02it/s]
|
568 |
86%|βββββββββ | 9194/10682 [1:18:19<12:15, 2.02it/s]
|
569 |
86%|βββββββββ | 9195/10682 [1:18:19<12:14, 2.02it/s]
|
570 |
86%|βββββββββ | 9196/10682 [1:18:20<12:14, 2.02it/s]
|
571 |
86%|βββββββββ | 9197/10682 [1:18:20<12:13, 2.02it/s]
|
572 |
86%|βββββββββ | 9198/10682 [1:18:21<12:12, 2.02it/s]
|
573 |
86%|βββββββββ | 9199/10682 [1:18:21<12:12, 2.02it/s]
|
574 |
86%|βββββββββ | 9200/10682 [1:18:22<12:12, 2.02it/s]{'loss': 2.7368, 'grad_norm': 0.28075122833251953, 'learning_rate': 5.7505864256519716e-05, 'epoch': 12.05}
|
575 |
|
|
|
576 |
86%|βββββββββ | 9200/10682 [1:18:22<12:12, 2.02it/s]
|
577 |
86%|βββββββββ | 9201/10682 [1:18:22<12:12, 2.02it/s]
|
578 |
86%|βββββββββ | 9202/10682 [1:18:23<12:11, 2.02it/s]
|
579 |
86%|βββββββββ | 9203/10682 [1:18:23<12:10, 2.02it/s]
|
580 |
86%|βββββββββ | 9204/10682 [1:18:24<12:10, 2.02it/s]
|
581 |
86%|βββββββββ | 9205/10682 [1:18:24<12:09, 2.02it/s]
|
582 |
86%|βββββββββ | 9206/10682 [1:18:25<12:09, 2.02it/s]
|
583 |
86%|βββββββββ | 9207/10682 [1:18:25<12:08, 2.02it/s]
|
584 |
86%|βββββββββ | 9208/10682 [1:18:26<12:07, 2.03it/s]
|
585 |
86%|βββββββββ | 9209/10682 [1:18:26<12:07, 2.02it/s]
|
586 |
86%|βββββββββ | 9210/10682 [1:18:27<12:06, 2.03it/s]
|
587 |
86%|βββββββββ | 9211/10682 [1:18:27<12:06, 2.02it/s]
|
588 |
86%|βββββββββ | 9212/10682 [1:18:28<12:05, 2.02it/s]
|
589 |
86%|βββββββββ | 9213/10682 [1:18:28<12:05, 2.02it/s]
|
590 |
86%|βββββββββ | 9214/10682 [1:18:29<12:04, 2.03it/s]
|
591 |
86%|βββββββββ | 9215/10682 [1:18:29<12:05, 2.02it/s]
|
592 |
86%|βββββββββ | 9216/10682 [1:18:30<12:04, 2.02it/s]
|
593 |
86%|βββββββββ | 9217/10682 [1:18:30<12:04, 2.02it/s]
|
594 |
86%|βββββββββ | 9218/10682 [1:18:31<12:03, 2.02it/s]
|
595 |
86%|βββββββββ | 9219/10682 [1:18:31<12:03, 2.02it/s]
|
596 |
86%|βββββββββ | 9220/10682 [1:18:32<12:03, 2.02it/s]
|
597 |
86%|βββββββββ | 9221/10682 [1:18:32<12:03, 2.02it/s]
|
598 |
86%|βββββββββ | 9222/10682 [1:18:33<12:01, 2.02it/s]
|
599 |
86%|βββββββββ | 9223/10682 [1:18:33<12:01, 2.02it/s]
|
600 |
86%|βββββββββ | 9224/10682 [1:18:34<12:00, 2.02it/s]
|
601 |
86%|βββββββββ | 9225/10682 [1:18:34<11:59, 2.02it/s]{'loss': 2.7433, 'grad_norm': 0.2763462960720062, 'learning_rate': 5.561858464291258e-05, 'epoch': 12.08}
|
|
|
602 |
|
603 |
86%|βββββββββ | 9225/10682 [1:18:34<11:59, 2.02it/s]
|
604 |
86%|βββββββββ | 9226/10682 [1:18:35<11:59, 2.02it/s]
|
605 |
86%|βββββββββ | 9227/10682 [1:18:35<12:00, 2.02it/s]
|
606 |
86%|βββββββββ | 9228/10682 [1:18:36<11:58, 2.02it/s]
|
607 |
86%|βββββββββ | 9229/10682 [1:18:36<11:58, 2.02it/s]
|
608 |
86%|βββββββββ | 9230/10682 [1:18:37<11:57, 2.02it/s]
|
609 |
86%|βββββββββ | 9231/10682 [1:18:37<11:57, 2.02it/s]
|
610 |
86%|βββββββββ | 9232/10682 [1:18:37<11:56, 2.02it/s]
|
611 |
86%|βββββββββ | 9233/10682 [1:18:38<11:56, 2.02it/s]
|
612 |
86%|βββββββββ | 9234/10682 [1:18:38<11:55, 2.02it/s]
|
613 |
86%|βββββββββ | 9235/10682 [1:18:39<11:55, 2.02it/s]
|
614 |
86%|βββββββββ | 9236/10682 [1:18:39<11:54, 2.02it/s]
|
615 |
86%|βββββββββ | 9237/10682 [1:18:40<11:54, 2.02it/s]
|
616 |
86%|βββββββββ | 9238/10682 [1:18:40<11:54, 2.02it/s]
|
617 |
86%|βββββββββ | 9239/10682 [1:18:41<11:53, 2.02it/s]
|
618 |
87%|βββββββββ | 9240/10682 [1:18:41<11:52, 2.02it/s]
|
619 |
87%|βββββββββ | 9241/10682 [1:18:42<11:53, 2.02it/s]
|
620 |
87%|βββββββββ | 9242/10682 [1:18:42<11:52, 2.02it/s]
|
621 |
87%|βββββββββ | 9243/10682 [1:18:43<11:51, 2.02it/s]
|
622 |
87%|βββββββββ | 9244/10682 [1:18:43<11:51, 2.02it/s]
|
623 |
87%|βββββββββ | 9245/10682 [1:18:44<11:50, 2.02it/s]
|
624 |
87%|βββββββββ | 9246/10682 [1:18:44<11:50, 2.02it/s]
|
625 |
87%|βββββββββ | 9247/10682 [1:18:45<11:49, 2.02it/s]
|
626 |
87%|βββββββββ | 9248/10682 [1:18:45<11:49, 2.02it/s]
|
627 |
87%|βββββββββ | 9249/10682 [1:18:46<11:47, 2.02it/s]
|
628 |
87%|βββββββββ | 9250/10682 [1:18:46<11:47, 2.02it/s]
|
629 |
|
|
|
630 |
87%|βββββββββ | 9250/10682 [1:18:46<11:47, 2.02it/s]
|
631 |
87%|βββββββββ | 9251/10682 [1:18:47<11:47, 2.02it/s]
|
632 |
87%|βββββββββ | 9252/10682 [1:18:47<11:46, 2.02it/s]
|
633 |
87%|βββββββββ | 9253/10682 [1:18:48<11:45, 2.03it/s]
|
634 |
87%|βββββββββ | 9254/10682 [1:18:48<11:45, 2.02it/s]
|
635 |
87%|βββββββββ | 9255/10682 [1:18:49<11:44, 2.02it/s]
|
636 |
87%|βββββββββ | 9256/10682 [1:18:49<11:45, 2.02it/s]
|
637 |
87%|βββββββββ | 9257/10682 [1:18:50<11:44, 2.02it/s]
|
638 |
87%|βββββββββ | 9258/10682 [1:18:50<11:44, 2.02it/s]
|
639 |
87%|βββββββββ | 9259/10682 [1:18:51<11:43, 2.02it/s]
|
640 |
87%|βββββββββ | 9260/10682 [1:18:51<11:43, 2.02it/s]
|
641 |
87%|βββββββββ | 9261/10682 [1:18:52<11:42, 2.02it/s]
|
642 |
87%|βββββββββ | 9262/10682 [1:18:52<11:41, 2.02it/s]
|
643 |
87%|βββββββββ | 9263/10682 [1:18:53<11:41, 2.02it/s]
|
644 |
87%|βββββββββ | 9264/10682 [1:18:53<11:40, 2.02it/s]
|
645 |
87%|βββββββββ | 9265/10682 [1:18:54<11:40, 2.02it/s]
|
646 |
87%|βββββββββ | 9266/10682 [1:18:54<11:39, 2.02it/s]
|
647 |
87%|βββββββββ | 9267/10682 [1:18:55<11:39, 2.02it/s]
|
648 |
87%|βββββββββ | 9268/10682 [1:18:55<11:38, 2.02it/s]
|
649 |
87%|βββββββββ | 9269/10682 [1:18:56<11:38, 2.02it/s]
|
650 |
87%|βββββββββ | 9270/10682 [1:18:56<11:37, 2.02it/s]
|
651 |
87%|βββββββββ | 9271/10682 [1:18:57<11:37, 2.02it/s]
|
652 |
87%|βββββββββ | 9272/10682 [1:18:57<11:36, 2.02it/s]
|
653 |
87%|βββββββββ | 9273/10682 [1:18:58<11:36, 2.02it/s]
|
654 |
87%|βββββββββ | 9274/10682 [1:18:58<11:35, 2.02it/s]
|
655 |
87%|βββββββββ | 9275/10682 [1:18:59<11:35, 2.02it/s]
|
656 |
|
|
|
657 |
87%|βββββββββ | 9275/10682 [1:18:59<11:35, 2.02it/s]
|
658 |
87%|βββββββββ | 9276/10682 [1:18:59<11:35, 2.02it/s]
|
659 |
87%|βββββββββ | 9277/10682 [1:19:00<11:35, 2.02it/s]
|
660 |
87%|βββββββββ | 9278/10682 [1:19:00<11:34, 2.02it/s]
|
661 |
87%|βββββββββ | 9279/10682 [1:19:01<11:34, 2.02it/s]
|
662 |
87%|βββββββββ | 9280/10682 [1:19:01<11:32, 2.02it/s]
|
663 |
87%|βββββββββ | 9281/10682 [1:19:02<11:32, 2.02it/s]
|
664 |
87%|βββββββββ | 9282/10682 [1:19:02<11:31, 2.02it/s]
|
665 |
87%|βββββββββ | 9283/10682 [1:19:03<11:31, 2.02it/s]
|
666 |
87%|βββββββββ | 9284/10682 [1:19:03<11:30, 2.02it/s]
|
667 |
87%|βββββββββ | 9285/10682 [1:19:04<11:31, 2.02it/s]
|
668 |
87%|βββββββββ | 9286/10682 [1:19:04<11:30, 2.02it/s]
|
669 |
87%|βββββββββ | 9287/10682 [1:19:05<11:30, 2.02it/s]
|
670 |
87%|βββββββββ | 9288/10682 [1:19:05<11:29, 2.02it/s]
|
671 |
87%|βββββββββ | 9289/10682 [1:19:06<11:29, 2.02it/s]
|
672 |
87%|βββββββββ | 9290/10682 [1:19:06<11:28, 2.02it/s]
|
673 |
87%|βββββββββ | 9291/10682 [1:19:07<11:28, 2.02it/s]
|
674 |
87%|βββββββββ | 9292/10682 [1:19:07<11:28, 2.02it/s]
|
675 |
87%|βββββββββ | 9293/10682 [1:19:08<11:27, 2.02it/s]
|
676 |
87%|βββββββββ | 9294/10682 [1:19:08<11:27, 2.02it/s]
|
677 |
87%|βββββββββ | 9295/10682 [1:19:09<11:26, 2.02it/s]
|
678 |
87%|βββββββββ | 9296/10682 [1:19:09<11:25, 2.02it/s]
|
679 |
87%|βββββββββ | 9297/10682 [1:19:10<11:25, 2.02it/s]
|
680 |
87%|βββββββββ | 9298/10682 [1:19:10<11:24, 2.02it/s]
|
681 |
87%|βββββββββ | 9299/10682 [1:19:11<11:24, 2.02it/s]
|
682 |
87%|βββββββββ | 9300/10682 [1:19:11<11:24, 2.02it/s]{'loss': 2.7346, 'grad_norm': 0.27623340487480164, 'learning_rate': 5.0135217920839137e-05, 'epoch': 12.18}
|
|
|
683 |
|
684 |
87%|βββββββββ | 9300/10682 [1:19:11<11:24, 2.02it/s]
|
685 |
87%|βββββββββ | 9301/10682 [1:19:12<11:24, 2.02it/s]
|
686 |
87%|βββββββββ | 9302/10682 [1:19:12<11:22, 2.02it/s]
|
687 |
87%|βββββββββ | 9303/10682 [1:19:13<11:22, 2.02it/s]
|
688 |
87%|βββββββββ | 9304/10682 [1:19:13<11:21, 2.02it/s]
|
689 |
87%|βββββββββ | 9305/10682 [1:19:14<11:21, 2.02it/s]
|
690 |
87%|βββββββββ | 9306/10682 [1:19:14<11:19, 2.02it/s]
|
691 |
87%|βββββββββ | 9307/10682 [1:19:15<11:19, 2.02it/s]
|
692 |
87%|βββββββββ | 9308/10682 [1:19:15<11:18, 2.02it/s]
|
693 |
87%|βββββββββ | 9309/10682 [1:19:16<11:18, 2.02it/s]
|
694 |
87%|βββββββββ | 9310/10682 [1:19:16<11:17, 2.02it/s]
|
695 |
87%|βββββββββ | 9311/10682 [1:19:17<11:17, 2.02it/s]
|
696 |
87%|βββββββββ | 9312/10682 [1:19:17<11:16, 2.03it/s]
|
697 |
87%|βββββββββ | 9313/10682 [1:19:18<11:16, 2.02it/s]
|
698 |
87%|βββββββββ | 9314/10682 [1:19:18<11:15, 2.02it/s]
|
699 |
87%|βββββββββ | 9315/10682 [1:19:19<11:15, 2.02it/s]
|
700 |
87%|βββββββββ | 9316/10682 [1:19:19<11:14, 2.02it/s]
|
701 |
87%|βββββββββ | 9317/10682 [1:19:20<11:14, 2.02it/s]
|
702 |
87%|βββββββββ | 9318/10682 [1:19:20<11:13, 2.02it/s]
|
703 |
87%|βββββββββ | 9319/10682 [1:19:21<11:15, 2.02it/s]
|
704 |
87%|βββββββββ | 9320/10682 [1:19:21<11:14, 2.02it/s]
|
705 |
87%|βββββββββ | 9321/10682 [1:19:22<11:13, 2.02it/s]
|
706 |
87%|βββββββββ | 9322/10682 [1:19:22<11:12, 2.02it/s]
|
707 |
87%|βββββββββ | 9323/10682 [1:19:22<11:11, 2.02it/s]
|
708 |
87%|βββββββββ | 9324/10682 [1:19:23<11:11, 2.02it/s]
|
709 |
87%|βββββββββ | 9325/10682 [1:19:23<11:11, 2.02it/s]{'loss': 2.7366, 'grad_norm': 0.2855311632156372, 'learning_rate': 4.836732641133895e-05, 'epoch': 12.21}
|
710 |
|
|
|
711 |
87%|βββββββββ | 9325/10682 [1:19:23<11:11, 2.02it/s]
|
712 |
87%|βββββββββ | 9326/10682 [1:19:24<11:11, 2.02it/s]
|
713 |
87%|βββββββββ | 9327/10682 [1:19:24<11:10, 2.02it/s]
|
714 |
87%|βββββββββ | 9328/10682 [1:19:25<11:09, 2.02it/s]
|
715 |
87%|βββββββββ | 9329/10682 [1:19:25<11:08, 2.02it/s]
|
716 |
87%|βββββββββ | 9330/10682 [1:19:26<11:08, 2.02it/s]
|
717 |
87%|βββββββββ | 9331/10682 [1:19:26<11:08, 2.02it/s]
|
718 |
87%|βββββββββ | 9332/10682 [1:19:27<11:07, 2.02it/s]
|
719 |
87%|βββββββββ | 9333/10682 [1:19:27<11:06, 2.02it/s]
|
720 |
87%|βββββββββ | 9334/10682 [1:19:28<11:07, 2.02it/s]
|
721 |
87%|βββββββββ | 9335/10682 [1:19:28<11:06, 2.02it/s]
|
722 |
87%|βββββββββ | 9336/10682 [1:19:29<11:05, 2.02it/s]
|
723 |
87%|βββββββββ | 9337/10682 [1:19:29<11:05, 2.02it/s]
|
724 |
87%|βββοΏ½οΏ½βββββ | 9338/10682 [1:19:30<11:04, 2.02it/s]
|
725 |
87%|βββββββββ | 9339/10682 [1:19:30<11:04, 2.02it/s]
|
726 |
87%|βββββββββ | 9340/10682 [1:19:31<11:03, 2.02it/s]
|
727 |
87%|βββββββββ | 9341/10682 [1:19:31<11:02, 2.02it/s]
|
728 |
87%|βββββββββ | 9342/10682 [1:19:32<11:02, 2.02it/s]
|
729 |
87%|βββββββββ | 9343/10682 [1:19:32<11:01, 2.02it/s]
|
730 |
87%|βββββββββ | 9344/10682 [1:19:33<11:01, 2.02it/s]
|
731 |
87%|βββββββββ | 9345/10682 [1:19:33<11:00, 2.02it/s]
|
732 |
87%|βββββββββ | 9346/10682 [1:19:34<11:00, 2.02it/s]
|
733 |
88%|βββββββββ | 9347/10682 [1:19:34<11:00, 2.02it/s]
|
734 |
88%|βββββββββ | 9348/10682 [1:19:35<10:59, 2.02it/s]
|
735 |
88%|βββββββββ | 9349/10682 [1:19:35<10:59, 2.02it/s]
|
736 |
88%|βββββββββ | 9350/10682 [1:19:36<10:58, 2.02it/s]{'loss': 2.7481, 'grad_norm': 0.28073567152023315, 'learning_rate': 4.662958195146971e-05, 'epoch': 12.25}
|
|
|
737 |
|
738 |
88%|βββββββββ | 9350/10682 [1:19:36<10:58, 2.02it/s]
|
739 |
88%|βββββββββ | 9351/10682 [1:19:36<10:58, 2.02it/s]
|
740 |
88%|βββββββββ | 9352/10682 [1:19:37<10:57, 2.02it/s]
|
741 |
88%|βββββββββ | 9353/10682 [1:19:37<10:56, 2.02it/s]
|
742 |
88%|βββββββββ | 9354/10682 [1:19:38<10:56, 2.02it/s]
|
743 |
88%|βββββββββ | 9355/10682 [1:19:38<10:55, 2.02it/s]
|
744 |
88%|βββββββββ | 9356/10682 [1:19:39<10:55, 2.02it/s]
|
745 |
88%|βββββββββ | 9357/10682 [1:19:39<10:55, 2.02it/s]
|
746 |
88%|βββββββββ | 9358/10682 [1:19:40<10:54, 2.02it/s]
|
747 |
88%|βββββββββ | 9359/10682 [1:19:40<10:54, 2.02it/s]
|
748 |
88%|βββββββββ | 9360/10682 [1:19:41<10:54, 2.02it/s]
|
749 |
88%|βββββββββ | 9361/10682 [1:19:41<10:54, 2.02it/s]
|
750 |
88%|βββββββββ | 9362/10682 [1:19:42<10:53, 2.02it/s]
|
751 |
88%|βββββββββ | 9363/10682 [1:19:42<10:53, 2.02it/s]
|
752 |
88%|βββββββββ | 9364/10682 [1:19:43<10:52, 2.02it/s]
|
753 |
88%|βββββββββ | 9365/10682 [1:19:43<10:51, 2.02it/s]
|
754 |
88%|βββββββββ | 9366/10682 [1:19:44<10:50, 2.02it/s]
|
755 |
88%|βββββββββ | 9367/10682 [1:19:44<10:50, 2.02it/s]
|
756 |
88%|βββββββββ | 9368/10682 [1:19:45<10:49, 2.02it/s]
|
757 |
88%|βββββββββ | 9369/10682 [1:19:45<10:49, 2.02it/s]
|
758 |
88%|βββββββββ | 9370/10682 [1:19:46<10:48, 2.02it/s]
|
759 |
88%|βββββββββ | 9371/10682 [1:19:46<10:48, 2.02it/s]
|
760 |
88%|βββββββββ | 9372/10682 [1:19:47<10:47, 2.02it/s]
|
761 |
88%|βββββββββ | 9373/10682 [1:19:47<10:47, 2.02it/s]
|
762 |
88%|βββββββββ | 9374/10682 [1:19:48<10:46, 2.02it/s]
|
763 |
88%|βββββββββ | 9375/10682 [1:19:48<10:45, 2.03it/s]
|
764 |
|
|
|
765 |
88%|βββββββββ | 9375/10682 [1:19:48<10:45, 2.03it/s]
|
766 |
88%|βββββββββ | 9376/10682 [1:19:49<10:45, 2.02it/s]
|
767 |
88%|βββββββββ | 9377/10682 [1:19:49<10:44, 2.02it/s]
|
768 |
88%|βββββββββ | 9378/10682 [1:19:50<10:44, 2.02it/s]
|
769 |
88%|βββββββββ | 9379/10682 [1:19:50<10:43, 2.02it/s]
|
770 |
88%|βββββββββ | 9380/10682 [1:19:51<10:44, 2.02it/s]
|
771 |
88%|βββββββββ | 9381/10682 [1:19:51<10:42, 2.02it/s]
|
772 |
88%|βββββββββ | 9382/10682 [1:19:52<10:42, 2.02it/s]
|
773 |
88%|βββββββββ | 9383/10682 [1:19:52<10:41, 2.03it/s]
|
774 |
88%|βββββββββ | 9384/10682 [1:19:53<10:41, 2.02it/s]
|
775 |
88%|βββββββββ | 9385/10682 [1:19:53<10:40, 2.03it/s]
|
776 |
88%|βββββββββ | 9386/10682 [1:19:54<10:40, 2.02it/s]
|
777 |
88%|βββββββββ | 9387/10682 [1:19:54<10:39, 2.02it/s]
|
778 |
88%|βββββββββ | 9388/10682 [1:19:55<10:39, 2.02it/s]
|
779 |
88%|βββββββββ | 9389/10682 [1:19:55<10:39, 2.02it/s]
|
780 |
88%|βββββββββ | 9390/10682 [1:19:56<10:37, 2.03it/s]
|
781 |
88%|βββββββββ | 9391/10682 [1:19:56<10:38, 2.02it/s]
|
782 |
88%|βββββββββ | 9392/10682 [1:19:57<10:37, 2.02it/s]
|
783 |
88%|βββββββββ | 9393/10682 [1:19:57<10:36, 2.02it/s]
|
784 |
88%|βββββββββ | 9394/10682 [1:19:58<10:35, 2.03it/s]
|
785 |
88%|βββββββββ | 9395/10682 [1:19:58<10:35, 2.02it/s]
|
786 |
88%|βββββββββ | 9396/10682 [1:19:59<10:35, 2.02it/s]
|
787 |
88%|βββββββββ | 9397/10682 [1:19:59<10:35, 2.02it/s]
|
788 |
88%|βββββββββ | 9398/10682 [1:20:00<10:34, 2.02it/s]
|
789 |
88%|βββββββββ | 9399/10682 [1:20:00<10:34, 2.02it/s]
|
790 |
88%|βββββββββ | 9400/10682 [1:20:01<10:33, 2.02it/s]{'loss': 2.7415, 'grad_norm': 0.27810999751091003, 'learning_rate': 4.3244996147050855e-05, 'epoch': 12.31}
|
791 |
|
|
|
792 |
88%|βββββββββ | 9400/10682 [1:20:01<10:33, 2.02it/s]
|
793 |
88%|βββββββββ | 9401/10682 [1:20:01<10:34, 2.02it/s]
|
794 |
88%|βββββββββ | 9402/10682 [1:20:02<10:33, 2.02it/s]
|
795 |
88%|βββββββββ | 9403/10682 [1:20:02<10:32, 2.02it/s]
|
796 |
88%|βββββββββ | 9404/10682 [1:20:03<10:32, 2.02it/s]
|
797 |
88%|βββββββββ | 9405/10682 [1:20:03<10:32, 2.02it/s]
|
798 |
88%|βββββββββ | 9406/10682 [1:20:04<10:31, 2.02it/s]
|
799 |
88%|βββββββββ | 9407/10682 [1:20:04<10:30, 2.02it/s]
|
800 |
88%|βββββββββ | 9408/10682 [1:20:05<10:30, 2.02it/s]
|
801 |
88%|βββββββββ | 9409/10682 [1:20:05<10:29, 2.02it/s]
|
802 |
88%|βββββββββ | 9410/10682 [1:20:06<10:29, 2.02it/s]
|
803 |
88%|βββββββββ | 9411/10682 [1:20:06<10:28, 2.02it/s]
|
804 |
88%|βββββββββ | 9412/10682 [1:20:07<10:28, 2.02it/s]
|
805 |
88%|βββββββββ | 9413/10682 [1:20:07<10:27, 2.02it/s]
|
806 |
88%|βββββββββ | 9414/10682 [1:20:07<10:27, 2.02it/s]
|
807 |
88%|βββββββββ | 9415/10682 [1:20:08<10:26, 2.02it/s]
|
808 |
88%|βββββββββ | 9416/10682 [1:20:08<10:26, 2.02it/s]
|
809 |
88%|βββββββββ | 9417/10682 [1:20:09<10:26, 2.02it/s]
|
810 |
88%|βββββββββ | 9418/10682 [1:20:09<10:25, 2.02it/s]
|
811 |
88%|βββββββββ | 9419/10682 [1:20:10<10:25, 2.02it/s]
|
812 |
88%|βββββββββ | 9420/10682 [1:20:10<10:24, 2.02it/s]
|
813 |
88%|βββββββββ | 9421/10682 [1:20:11<10:23, 2.02it/s]
|
814 |
88%|βββββββββ | 9422/10682 [1:20:11<10:23, 2.02it/s]
|
815 |
88%|βββββββββ | 9423/10682 [1:20:12<10:23, 2.02it/s]
|
816 |
88%|βββββββββ | 9424/10682 [1:20:12<10:22, 2.02it/s]
|
817 |
88%|βββββββββ | 9425/10682 [1:20:13<10:21, 2.02it/s]
|
818 |
|
|
|
819 |
88%|βββββββββ | 9425/10682 [1:20:13<10:21, 2.02it/s]
|
820 |
88%|βββββββββ | 9426/10682 [1:20:13<10:21, 2.02it/s]
|
821 |
88%|βββββββββ | 9427/10682 [1:20:14<10:21, 2.02it/s]
|
822 |
88%|βββββββββ | 9428/10682 [1:20:14<10:20, 2.02it/s]
|
823 |
88%|βββββββββ | 9429/10682 [1:20:15<10:19, 2.02it/s]
|
824 |
88%|βββββββββ | 9430/10682 [1:20:15<10:18, 2.02it/s]
|
825 |
88%|βββββββββ | 9431/10682 [1:20:16<10:18, 2.02it/s]
|
826 |
88%|βββββββββ | 9432/10682 [1:20:16<10:17, 2.02it/s]
|
827 |
88%|βββββββββ | 9433/10682 [1:20:17<10:17, 2.02it/s]
|
828 |
88%|βββββββββ | 9434/10682 [1:20:17<10:16, 2.02it/s]
|
829 |
88%|βββββββββ | 9435/10682 [1:20:18<10:16, 2.02it/s]
|
830 |
88%|βββββββββ | 9436/10682 [1:20:18<10:15, 2.02it/s]
|
831 |
88%|βββββββββ | 9437/10682 [1:20:19<10:15, 2.02it/s]
|
832 |
88%|βββββββββ | 9438/10682 [1:20:19<10:14, 2.02it/s]
|
833 |
88%|βββββββββ | 9439/10682 [1:20:20<10:14, 2.02it/s]
|
834 |
88%|βββββββββ | 9440/10682 [1:20:20<10:13, 2.02it/s]
|
835 |
88%|βββββββββ | 9441/10682 [1:20:21<10:13, 2.02it/s]
|
836 |
88%|βββββββββ | 9442/10682 [1:20:21<10:12, 2.02it/s]
|
837 |
88%|βββββββββ | 9443/10682 [1:20:22<10:12, 2.02it/s]
|
838 |
88%|βββββββββ | 9444/10682 [1:20:22<10:11, 2.02it/s]
|
839 |
88%|βββββββββ | 9445/10682 [1:20:23<10:12, 2.02it/s]
|
840 |
88%|βββββββββ | 9446/10682 [1:20:23<10:11, 2.02it/s]
|
841 |
88%|βββββββββ | 9447/10682 [1:20:24<10:10, 2.02it/s]
|
842 |
88%|βββββββββ | 9448/10682 [1:20:24<10:09, 2.02it/s]
|
843 |
88%|βββββββββ | 9449/10682 [1:20:25<10:08, 2.03it/s]
|
844 |
88%|βββββββββ | 9450/10682 [1:20:25<10:08, 2.02it/s]
|
845 |
{'loss': 2.7417, 'grad_norm': 0.2766771912574768, 'learning_rate': 3.998236419395806e-05, 'epoch': 12.38}
|
|
|
846 |
88%|βββββββββ | 9450/10682 [1:20:25<10:08, 2.02it/s]
|
847 |
88%|βββββββββ | 9451/10682 [1:20:26<10:08, 2.02it/s]
|
848 |
88%|βββββββββ | 9452/10682 [1:20:26<10:07, 2.02it/s]
|
849 |
88%|βββββββββ | 9453/10682 [1:20:27<10:07, 2.02it/s]
|
850 |
89%|βββββββββ | 9454/10682 [1:20:27<10:06, 2.02it/s]
|
851 |
89%|βββββββββ | 9455/10682 [1:20:28<10:06, 2.02it/s]
|
852 |
89%|βββββββββ | 9456/10682 [1:20:28<10:05, 2.02it/s]
|
853 |
89%|βββββββββ | 9457/10682 [1:20:29<10:05, 2.02it/s]
|
854 |
89%|βββββββββ | 9458/10682 [1:20:29<10:05, 2.02it/s]
|
855 |
89%|βββββββββ | 9459/10682 [1:20:30<10:57, 1.86it/s]
|
856 |
89%|βββββββββ | 9460/10682 [1:20:30<10:40, 1.91it/s]
|
857 |
89%|βββββββββ | 9461/10682 [1:20:31<10:29, 1.94it/s]
|
858 |
89%|ββββοΏ½οΏ½ββββ | 9462/10682 [1:20:31<10:21, 1.96it/s]
|
859 |
89%|βββββββββ | 9463/10682 [1:20:32<10:15, 1.98it/s]
|
860 |
89%|βββββββββ | 9464/10682 [1:20:32<10:10, 1.99it/s]
|
861 |
89%|βββββββββ | 9465/10682 [1:20:33<10:07, 2.00it/s]
|
862 |
89%|βββββββββ | 9466/10682 [1:20:33<10:05, 2.01it/s]
|
863 |
89%|βββββββββ | 9467/10682 [1:20:34<10:02, 2.02it/s]
|
864 |
89%|βββββββββ | 9468/10682 [1:20:34<10:01, 2.02it/s]
|
865 |
89%|βββββββββ | 9469/10682 [1:20:35<10:00, 2.02it/s]
|
866 |
89%|βββββββββ | 9470/10682 [1:20:35<09:59, 2.02it/s]
|
867 |
89%|βββββββββ | 9471/10682 [1:20:36<09:58, 2.02it/s]
|
868 |
89%|βββββββββ | 9472/10682 [1:20:36<09:58, 2.02it/s]
|
869 |
89%|βββββββββ | 9473/10682 [1:20:37<09:57, 2.02it/s]
|
870 |
89%|βββββββββ | 9474/10682 [1:20:37<09:56, 2.02it/s]
|
871 |
89%|βββββββββ | 9475/10682 [1:20:38<09:55, 2.03it/s]{'loss': 2.7382, 'grad_norm': 0.27711209654808044, 'learning_rate': 3.839705441646779e-05, 'epoch': 12.41}
|
872 |
|
|
|
873 |
89%|βββββββββ | 9475/10682 [1:20:38<09:55, 2.03it/s]
|
874 |
89%|βββββββββ | 9476/10682 [1:20:38<09:58, 2.02it/s]
|
875 |
89%|βββββββββ | 9477/10682 [1:20:39<09:56, 2.02it/s]
|
876 |
89%|βββββββββ | 9478/10682 [1:20:39<09:56, 2.02it/s]
|
877 |
89%|βββββββββ | 9479/10682 [1:20:40<09:55, 2.02it/s]
|
878 |
89%|βββββββββ | 9480/10682 [1:20:40<09:54, 2.02it/s]
|
879 |
89%|βββββββββ | 9481/10682 [1:20:41<09:53, 2.02it/s]
|
880 |
89%|βββββββββ | 9482/10682 [1:20:41<09:53, 2.02it/s]
|
881 |
89%|βββββββββ | 9483/10682 [1:20:42<09:53, 2.02it/s]
|
882 |
89%|βββββββββ | 9484/10682 [1:20:42<09:52, 2.02it/s]
|
883 |
89%|βββββββββ | 9485/10682 [1:20:43<09:51, 2.02it/s]
|
884 |
89%|βββββββββ | 9486/10682 [1:20:43<09:51, 2.02it/s]
|
885 |
89%|βββββββββ | 9487/10682 [1:20:44<09:50, 2.02it/s]
|
886 |
89%|βββββββββ | 9488/10682 [1:20:44<09:50, 2.02it/s]
|
887 |
89%|βββββββββ | 9489/10682 [1:20:45<09:49, 2.02it/s]
|
888 |
89%|βββββββββ | 9490/10682 [1:20:45<09:49, 2.02it/s]
|
889 |
89%|βββββββββ | 9491/10682 [1:20:46<09:48, 2.02it/s]
|
890 |
89%|βββββββββ | 9492/10682 [1:20:46<09:47, 2.02it/s]
|
891 |
89%|βββββββββ | 9493/10682 [1:20:47<09:46, 2.03it/s]
|
892 |
89%|βββββββββ | 9494/10682 [1:20:47<09:46, 2.02it/s]
|
893 |
89%|βββββββββ | 9495/10682 [1:20:48<09:46, 2.02it/s]
|
894 |
89%|βββββββββ | 9496/10682 [1:20:48<09:46, 2.02it/s]
|
895 |
89%|βββββββββ | 9497/10682 [1:20:49<09:45, 2.02it/s]
|
896 |
89%|βββββββββ | 9498/10682 [1:20:49<09:45, 2.02it/s]
|
897 |
89%|βββββββββ | 9499/10682 [1:20:50<09:44, 2.02it/s]
|
898 |
89%|βββββββββ | 9500/10682 [1:20:50<09:43, 2.02it/s]
|
899 |
|
|
|
900 |
89%|βββββββββ | 9500/10682 [1:20:50<09:43, 2.02it/s]
|
901 |
89%|βββββββββ | 9501/10682 [1:20:51<09:44, 2.02it/s]
|
902 |
89%|βββββββββ | 9502/10682 [1:20:51<09:43, 2.02it/s]
|
903 |
89%|βββββββββ | 9503/10682 [1:20:52<09:43, 2.02it/s]
|
904 |
89%|βββββββββ | 9504/10682 [1:20:52<09:42, 2.02it/s]
|
905 |
89%|βββββββββ | 9505/10682 [1:20:53<09:42, 2.02it/s]
|
906 |
89%|βββββββββ | 9506/10682 [1:20:53<09:41, 2.02it/s]
|
907 |
89%|βββββββββ | 9507/10682 [1:20:54<09:40, 2.02it/s]
|
908 |
89%|βββββββββ | 9508/10682 [1:20:54<09:39, 2.03it/s]
|
909 |
89%|βββββββββ | 9509/10682 [1:20:55<09:39, 2.02it/s]
|
910 |
89%|βββββββββ | 9510/10682 [1:20:55<09:38, 2.03it/s]
|
911 |
89%|βββββββββ | 9511/10682 [1:20:56<09:38, 2.02it/s]
|
912 |
89%|βββββββββ | 9512/10682 [1:20:56<09:37, 2.02it/s]
|
913 |
89%|βββββββββ | 9513/10682 [1:20:57<09:37, 2.02it/s]
|
914 |
89%|βββββββββ | 9514/10682 [1:20:57<09:37, 2.02it/s]
|
915 |
89%|βββββββββ | 9515/10682 [1:20:58<09:36, 2.02it/s]
|
916 |
89%|βββββββββ | 9516/10682 [1:20:58<09:36, 2.02it/s]
|
917 |
89%|βββββββββ | 9517/10682 [1:20:59<09:35, 2.02it/s]
|
918 |
89%|βββββββββ | 9518/10682 [1:20:59<09:35, 2.02it/s]
|
919 |
89%|βββββββββ | 9519/10682 [1:21:00<09:34, 2.03it/s]
|
920 |
89%|βββββββββ | 9520/10682 [1:21:00<09:34, 2.02it/s]
|
921 |
89%|βββββββββ | 9521/10682 [1:21:01<09:33, 2.02it/s]
|
922 |
89%|βββββββββ | 9522/10682 [1:21:01<09:33, 2.02it/s]
|
923 |
89%|βββββββββ | 9523/10682 [1:21:02<09:32, 2.02it/s]
|
924 |
89%|βββββββββ | 9524/10682 [1:21:02<09:32, 2.02it/s]
|
925 |
89%|βββββββββ | 9525/10682 [1:21:03<09:31, 2.02it/s]
|
926 |
|
|
|
927 |
89%|βββββββββ | 9525/10682 [1:21:03<09:31, 2.02it/s]
|
928 |
89%|βββββββββ | 9526/10682 [1:21:03<09:32, 2.02it/s]
|
929 |
89%|βββββββββ | 9527/10682 [1:21:03<09:31, 2.02it/s]
|
930 |
89%|βββββββββ | 9528/10682 [1:21:04<09:30, 2.02it/s]
|
931 |
89%|βββββββββ | 9529/10682 [1:21:04<09:29, 2.02it/s]
|
932 |
89%|βββββββββ | 9530/10682 [1:21:05<09:28, 2.02it/s]
|
933 |
89%|βββββββββ | 9531/10682 [1:21:05<09:28, 2.02it/s]
|
934 |
89%|βββββββββ | 9532/10682 [1:21:06<09:27, 2.03it/s]
|
935 |
89%|βββββββββ | 9533/10682 [1:21:06<09:27, 2.02it/s]
|
936 |
89%|βββββββββ | 9534/10682 [1:21:07<09:26, 2.02it/s]
|
937 |
89%|βββββββββ | 9535/10682 [1:21:07<09:26, 2.02it/s]
|
938 |
89%|βββββββββ | 9536/10682 [1:21:08<09:26, 2.02it/s]
|
939 |
89%|βββββββββ | 9537/10682 [1:21:08<09:26, 2.02it/s]
|
940 |
89%|βββββββββ | 9538/10682 [1:21:09<09:25, 2.02it/s]
|
941 |
89%|βββββββββ | 9539/10682 [1:21:09<09:25, 2.02it/s]
|
942 |
89%|βββββββββ | 9540/10682 [1:21:10<09:24, 2.02it/s]
|
943 |
89%|βββββββββ | 9541/10682 [1:21:10<09:24, 2.02it/s]
|
944 |
89%|βββββββββ | 9542/10682 [1:21:11<09:23, 2.02it/s]
|
945 |
89%|βββββββββ | 9543/10682 [1:21:11<09:23, 2.02it/s]
|
946 |
89%|βββββββββ | 9544/10682 [1:21:12<09:22, 2.02it/s]
|
947 |
89%|βββββββββ | 9545/10682 [1:21:12<09:21, 2.02it/s]
|
948 |
89%|βββββββββ | 9546/10682 [1:21:13<09:21, 2.02it/s]
|
949 |
89%|βββββββββ | 9547/10682 [1:21:13<09:20, 2.02it/s]
|
950 |
89%|βββββββββ | 9548/10682 [1:21:14<09:20, 2.02it/s]
|
951 |
89%|βββββββββ | 9549/10682 [1:21:14<09:20, 2.02it/s]
|
952 |
89%|βββββββββ | 9550/10682 [1:21:15<09:19, 2.02it/s]
|
953 |
|
|
|
954 |
89%|βββββββββ | 9550/10682 [1:21:15<09:19, 2.02it/s]
|
955 |
89%|βββββββββ | 9551/10682 [1:21:15<09:19, 2.02it/s]
|
956 |
89%|βββββββββ | 9552/10682 [1:21:16<09:18, 2.02it/s]
|
957 |
89%|βββββββββ | 9553/10682 [1:21:16<09:18, 2.02it/s]
|
958 |
89%|βββββββββ | 9554/10682 [1:21:17<09:17, 2.02it/s]
|
959 |
89%|βββββββββ | 9555/10682 [1:21:17<09:17, 2.02it/s]
|
960 |
89%|βββββββββ | 9556/10682 [1:21:18<09:16, 2.02it/s]
|
961 |
89%|βββββββββ | 9557/10682 [1:21:18<09:15, 2.03it/s]
|
962 |
89%|βββββββββ | 9558/10682 [1:21:19<09:15, 2.02it/s]
|
963 |
89%|βββββββββ | 9559/10682 [1:21:19<09:14, 2.03it/s]
|
964 |
89%|βββββββββ | 9560/10682 [1:21:20<09:13, 2.03it/s]
|
965 |
90%|βββββββββ | 9561/10682 [1:21:20<09:13, 2.02it/s]
|
966 |
90%|βββββββββ | 9562/10682 [1:21:21<09:12, 2.03it/s]
|
967 |
90%|βββββββββ | 9563/10682 [1:21:21<09:13, 2.02it/s]
|
968 |
90%|βββββββββ | 9564/10682 [1:21:22<09:12, 2.02it/s]
|
969 |
90%|βββββββββ | 9565/10682 [1:21:22<09:12, 2.02it/s]
|
970 |
90%|βββββββββ | 9566/10682 [1:21:23<09:11, 2.02it/s]
|
971 |
90%|βββββββββ | 9567/10682 [1:21:23<09:11, 2.02it/s]
|
972 |
90%|βββββββββ | 9568/10682 [1:21:24<09:10, 2.02it/s]
|
973 |
90%|βββββββββ | 9569/10682 [1:21:24<09:57, 1.86it/s]
|
974 |
90%|βββββββββ | 9570/10682 [1:21:25<09:43, 1.91it/s]
|
975 |
90%|βββββββββ | 9571/10682 [1:21:25<09:32, 1.94it/s]
|
976 |
90%|βββββββββ | 9572/10682 [1:21:26<09:25, 1.96it/s]
|
977 |
90%|βββββββββ | 9573/10682 [1:21:26<09:20, 1.98it/s]
|
978 |
90%|βββββββββ | 9574/10682 [1:21:27<09:15, 1.99it/s]
|
979 |
90%|βββββββββ | 9575/10682 [1:21:27<09:13, 2.00it/s]
|
980 |
|
|
|
981 |
90%|βββββββββ | 9575/10682 [1:21:27<09:13, 2.00it/s]
|
982 |
90%|βββββββββ | 9576/10682 [1:21:28<09:13, 2.00it/s]
|
983 |
90%|βββββββββ | 9577/10682 [1:21:28<09:10, 2.01it/s]
|
984 |
90%|βββββββββ | 9578/10682 [1:21:29<09:08, 2.01it/s]
|
985 |
90%|βββββββββ | 9579/10682 [1:21:29<09:07, 2.02it/s]
|
986 |
90%|βββββββββ | 9580/10682 [1:21:30<09:06, 2.02it/s]
|
987 |
90%|βββββββββ | 9581/10682 [1:21:30<09:05, 2.02it/s]
|
988 |
90%|βββββββββ | 9582/10682 [1:21:31<09:05, 2.02it/s]
|
989 |
90%|βββββββββ | 9583/10682 [1:21:31<09:03, 2.02it/s]
|
990 |
90%|βββββββββ | 9584/10682 [1:21:32<09:03, 2.02it/s]
|
991 |
90%|βββββββββ | 9585/10682 [1:21:32<09:01, 2.02it/s]
|
992 |
90%|βββββββββ | 9586/10682 [1:21:33<09:01, 2.02it/s]
|
993 |
90%|βββββββββ | 9587/10682 [1:21:33<09:00, 2.03it/s]
|
994 |
90%|βββββββββ | 9588/10682 [1:21:34<09:00, 2.02it/s]
|
995 |
90%|βββββββββ | 9589/10682 [1:21:34<08:59, 2.02it/s]
|
996 |
90%|βββββββββ | 9590/10682 [1:21:35<08:59, 2.02it/s]
|
997 |
90%|βββββββββ | 9591/10682 [1:21:35<08:58, 2.03it/s]
|
998 |
90%|βββββββββ | 9592/10682 [1:21:36<08:58, 2.03it/s]
|
999 |
90%|βββββββββ | 9593/10682 [1:21:36<08:57, 2.03it/s]
|
1000 |
90%|βββββββββ | 9594/10682 [1:21:37<08:57, 2.03it/s]
|
1001 |
90%|βββββββββ | 9595/10682 [1:21:37<08:57, 2.02it/s]
|
1002 |
90%|βββββββββ | 9596/10682 [1:21:38<08:56, 2.02it/s]
|
1003 |
90%|βββββββββ | 9597/10682 [1:21:38<08:56, 2.02it/s]
|
1004 |
90%|βββββββββ | 9598/10682 [1:21:39<08:55, 2.02it/s]
|
1005 |
90%|βββββββββ | 9599/10682 [1:21:39<08:55, 2.02it/s]
|
1006 |
90%|βββββββββ | 9600/10682 [1:21:40<08:55, 2.02it/s]
|
1007 |
|
|
|
1008 |
90%|βββββββββ | 9600/10682 [1:21:40<08:55, 2.02it/s]
|
1009 |
90%|βββββββββ | 9601/10682 [1:21:40<08:54, 2.02it/s]
|
1010 |
90%|βββββββββ | 9602/10682 [1:21:41<08:54, 2.02it/s]
|
1011 |
90%|βββββββββ | 9603/10682 [1:21:41<08:53, 2.02it/s]
|
1012 |
90%|βββββββββ | 9604/10682 [1:21:42<08:52, 2.02it/s]
|
1013 |
90%|βββββββββ | 9605/10682 [1:21:42<08:52, 2.02it/s]
|
1014 |
90%|βββββββββ | 9606/10682 [1:21:43<08:52, 2.02it/s]
|
1015 |
90%|βββββββββ | 9607/10682 [1:21:43<08:51, 2.02it/s]
|
1016 |
90%|βββββββββ | 9608/10682 [1:21:44<08:51, 2.02it/s]
|
1017 |
90%|βββββββββ | 9609/10682 [1:21:44<08:50, 2.02it/s]
|
1018 |
90%|βββββββββ | 9610/10682 [1:21:45<08:49, 2.03it/s]
|
1019 |
90%|βββββββββ | 9611/10682 [1:21:45<08:49, 2.02it/s]
|
1020 |
90%|βββββββββ | 9612/10682 [1:21:46<08:48, 2.03it/s]
|
1021 |
90%|βββββββββ | 9613/10682 [1:21:46<08:48, 2.02it/s]
|
1022 |
90%|βββββββββ | 9614/10682 [1:21:47<08:46, 2.03it/s]
|
1023 |
90%|βββββββββ | 9615/10682 [1:21:47<08:46, 2.02it/s]
|
1024 |
90%|βββββββββ | 9616/10682 [1:21:48<08:46, 2.03it/s]
|
1025 |
90%|βββββββββ | 9617/10682 [1:21:48<08:46, 2.02it/s]
|
1026 |
90%|βββββββββ | 9618/10682 [1:21:49<08:45, 2.02it/s]
|
1027 |
90%|βββββββββ | 9619/10682 [1:21:49<08:45, 2.02it/s]
|
1028 |
90%|βββββββββ | 9620/10682 [1:21:50<08:44, 2.02it/s]
|
1029 |
90%|βββββββββ | 9621/10682 [1:21:50<08:44, 2.02it/s]
|
1030 |
90%|βββββββββ | 9622/10682 [1:21:51<08:43, 2.02it/s]
|
1031 |
90%|βββββββββ | 9623/10682 [1:21:51<08:43, 2.02it/s]
|
1032 |
90%|βββββββββ | 9624/10682 [1:21:52<08:43, 2.02it/s]
|
1033 |
90%|βββββββββ | 9625/10682 [1:21:52<08:42, 2.02it/s]{'loss': 2.7489, 'grad_norm': 0.2770747244358063, 'learning_rate': 2.9535819277506203e-05, 'epoch': 12.61}
|
1034 |
|
|
|
1035 |
90%|βββββββββ | 9625/10682 [1:21:52<08:42, 2.02it/s]
|
1036 |
90%|βββββββββ | 9626/10682 [1:21:53<08:42, 2.02it/s]
|
1037 |
90%|βββββββββ | 9627/10682 [1:21:53<08:42, 2.02it/s]
|
1038 |
90%|βββββββββ | 9628/10682 [1:21:54<08:41, 2.02it/s]
|
1039 |
90%|βββββββββ | 9629/10682 [1:21:54<08:40, 2.02it/s]
|
1040 |
90%|βββββββββ | 9630/10682 [1:21:55<08:39, 2.02it/s]
|
1041 |
90%|βββββββββ | 9631/10682 [1:21:55<08:39, 2.02it/s]
|
1042 |
90%|βββββββββ | 9632/10682 [1:21:56<08:38, 2.02it/s]
|
1043 |
90%|βββββββββ | 9633/10682 [1:21:56<08:38, 2.02it/s]
|
1044 |
90%|βββββββββ | 9634/10682 [1:21:57<08:37, 2.02it/s]
|
1045 |
90%|βββββββββ | 9635/10682 [1:21:57<08:37, 2.02it/s]
|
1046 |
90%|βββββββββ | 9636/10682 [1:21:58<08:36, 2.02it/s]
|
1047 |
90%|βββββββββ | 9637/10682 [1:21:58<08:36, 2.02it/s]
|
1048 |
90%|βββββββββ | 9638/10682 [1:21:59<08:35, 2.02it/s]
|
1049 |
90%|βββββββββ | 9639/10682 [1:21:59<08:35, 2.02it/s]
|
1050 |
90%|βββββββββ | 9640/10682 [1:21:59<08:34, 2.02it/s]
|
1051 |
90%|βββββββββ | 9641/10682 [1:22:00<08:34, 2.02it/s]
|
1052 |
90%|βββββββββ | 9642/10682 [1:22:00<08:34, 2.02it/s]
|
1053 |
90%|βββββββββ | 9643/10682 [1:22:01<08:33, 2.02it/s]
|
1054 |
90%|βββββββββ | 9644/10682 [1:22:01<08:33, 2.02it/s]
|
1055 |
90%|βββββββββ | 9645/10682 [1:22:02<08:32, 2.02it/s]
|
1056 |
90%|βββββββββ | 9646/10682 [1:22:02<08:32, 2.02it/s]
|
1057 |
90%|βββββββββ | 9647/10682 [1:22:03<08:31, 2.02it/s]
|
1058 |
90%|βββββββββ | 9648/10682 [1:22:03<08:31, 2.02it/s]
|
1059 |
90%|βββββββββ | 9649/10682 [1:22:04<08:30, 2.02it/s]
|
1060 |
90%|βββββββββ | 9650/10682 [1:22:04<08:29, 2.02it/s]{'loss': 2.7507, 'grad_norm': 0.2786267101764679, 'learning_rate': 2.8168304138088295e-05, 'epoch': 12.64}
|
1061 |
|
|
|
1062 |
90%|βββββββββ | 9650/10682 [1:22:04<08:29, 2.02it/s]
|
1063 |
90%|βββββββββ | 9651/10682 [1:22:05<08:29, 2.02it/s]
|
1064 |
90%|βββββββββ | 9652/10682 [1:22:05<08:29, 2.02it/s]
|
1065 |
90%|βββββββββ | 9653/10682 [1:22:06<08:29, 2.02it/s]
|
1066 |
90%|βββββββββ | 9654/10682 [1:22:06<08:28, 2.02it/s]
|
1067 |
90%|βββββββββ | 9655/10682 [1:22:07<08:28, 2.02it/s]
|
1068 |
90%|βββββββββ | 9656/10682 [1:22:07<08:27, 2.02it/s]
|
1069 |
90%|βββββββββ | 9657/10682 [1:22:08<08:27, 2.02it/s]
|
1070 |
90%|βββββββββ | 9658/10682 [1:22:08<08:26, 2.02it/s]
|
1071 |
90%|βββββββββ | 9659/10682 [1:22:09<08:26, 2.02it/s]
|
1072 |
90%|βββββββββ | 9660/10682 [1:22:09<08:25, 2.02it/s]
|
1073 |
90%|βββββββββ | 9661/10682 [1:22:10<08:25, 2.02it/s]
|
1074 |
90%|βββββββββ | 9662/10682 [1:22:10<08:24, 2.02it/s]
|
1075 |
90%|βββββββββ | 9663/10682 [1:22:11<08:24, 2.02it/s]
|
1076 |
90%|βββββββββ | 9664/10682 [1:22:11<08:23, 2.02it/s]
|
1077 |
90%|βββββββββ | 9665/10682 [1:22:12<08:23, 2.02it/s]
|
1078 |
90%|βββββββββ | 9666/10682 [1:22:12<08:23, 2.02it/s]
|
1079 |
90%|βββββββββ | 9667/10682 [1:22:13<08:22, 2.02it/s]
|
1080 |
91%|βββββββββ | 9668/10682 [1:22:13<08:21, 2.02it/s]
|
1081 |
91%|βββββββββ | 9669/10682 [1:22:14<08:20, 2.02it/s]
|
1082 |
91%|βββββββββ | 9670/10682 [1:22:14<08:19, 2.02it/s]
|
1083 |
91%|βββββββββ | 9671/10682 [1:22:15<08:19, 2.02it/s]
|
1084 |
91%|βββββββββ | 9672/10682 [1:22:15<08:18, 2.03it/s]
|
1085 |
91%|βββββββββ | 9673/10682 [1:22:16<08:18, 2.02it/s]
|
1086 |
91%|βββββββββ | 9674/10682 [1:22:16<08:17, 2.03it/s]
|
1087 |
91%|βββββββββ | 9675/10682 [1:22:17<08:17, 2.02it/s]
|
1088 |
{'loss': 2.7574, 'grad_norm': 0.27770131826400757, 'learning_rate': 2.6832284358471516e-05, 'epoch': 12.67}
|
|
|
1089 |
91%|βββββββββ | 9675/10682 [1:22:17<08:17, 2.02it/s]
|
1090 |
91%|βββββββββ | 9676/10682 [1:22:17<08:16, 2.02it/s]
|
1091 |
91%|βββββββββ | 9677/10682 [1:22:18<08:16, 2.02it/s]
|
1092 |
91%|βββββββββ | 9678/10682 [1:22:18<08:16, 2.02it/s]
|
1093 |
91%|βββββββββ | 9679/10682 [1:22:19<08:15, 2.02it/s]
|
1094 |
91%|βββββββββ | 9680/10682 [1:22:19<08:15, 2.02it/s]
|
1095 |
91%|βββββββββ | 9681/10682 [1:22:20<08:14, 2.02it/s]
|
1096 |
91%|βββββββββ | 9682/10682 [1:22:20<08:14, 2.02it/s]
|
1097 |
91%|βββββββββ | 9683/10682 [1:22:21<08:13, 2.02it/s]
|
1098 |
91%|βββββββββ | 9684/10682 [1:22:21<08:13, 2.02it/s]
|
1099 |
91%|βββββββββ | 9685/10682 [1:22:22<08:12, 2.02it/s]
|
1100 |
91%|βββββββββ | 9686/10682 [1:22:22<08:12, 2.02it/s]
|
1101 |
91%|βββββββββ | 9687/10682 [1:22:23<08:11, 2.03it/s]
|
1102 |
91%|βββββββββ | 9688/10682 [1:22:23<08:11, 2.02it/s]
|
1103 |
91%|βββββββββ | 9689/10682 [1:22:24<08:10, 2.02it/s]
|
1104 |
91%|βββββββββ | 9690/10682 [1:22:24<08:10, 2.02it/s]
|
1105 |
91%|βββββββββ | 9691/10682 [1:22:25<08:09, 2.02it/s]
|
1106 |
91%|βββββββββ | 9692/10682 [1:22:25<08:09, 2.02it/s]
|
1107 |
91%|βββββββββ | 9693/10682 [1:22:26<08:08, 2.02it/s]
|
1108 |
91%|βββββββββ | 9694/10682 [1:22:26<08:08, 2.02it/s]
|
1109 |
91%|βββββββββ | 9695/10682 [1:22:27<08:08, 2.02it/s]
|
1110 |
91%|βββββββββ | 9696/10682 [1:22:27<08:07, 2.02it/s]
|
1111 |
91%|βββββββββ | 9697/10682 [1:22:28<08:06, 2.02it/s]
|
1112 |
91%|βββββββββ | 9698/10682 [1:22:28<08:06, 2.02it/s]
|
1113 |
91%|βββββββββ | 9699/10682 [1:22:29<08:05, 2.02it/s]
|
1114 |
91%|βββββββββ | 9700/10682 [1:22:29<08:05, 2.02it/s]
|
1115 |
{'loss': 2.749, 'grad_norm': 0.27603113651275635, 'learning_rate': 2.5527849119658387e-05, 'epoch': 12.7}
|
|
|
1116 |
91%|βββββββββ | 9700/10682 [1:22:29<08:05, 2.02it/s]
|
1117 |
91%|βββββββββ | 9701/10682 [1:22:30<08:05, 2.02it/s]
|
1118 |
91%|βββββββββ | 9702/10682 [1:22:30<08:04, 2.02it/s]
|
1119 |
91%|βββββββββ | 9703/10682 [1:22:31<08:04, 2.02it/s]
|
1120 |
91%|βββββββββ | 9704/10682 [1:22:31<08:03, 2.02it/s]
|
1121 |
91%|βββββββββ | 9705/10682 [1:22:32<08:03, 2.02it/s]
|
1122 |
91%|βββββββββ | 9706/10682 [1:22:32<08:02, 2.02it/s]
|
1123 |
91%|βββββββββ | 9707/10682 [1:22:33<08:02, 2.02it/s]
|
1124 |
91%|βββββββββ | 9708/10682 [1:22:33<08:01, 2.02it/s]
|
1125 |
91%|βββββββββ | 9709/10682 [1:22:34<08:01, 2.02it/s]
|
1126 |
91%|βββββββοΏ½οΏ½οΏ½β | 9710/10682 [1:22:34<08:00, 2.02it/s]
|
1127 |
91%|βββββββββ | 9711/10682 [1:22:35<08:00, 2.02it/s]
|
1128 |
91%|βββββββββ | 9712/10682 [1:22:35<07:59, 2.02it/s]
|
1129 |
91%|βββββββββ | 9713/10682 [1:22:36<07:58, 2.02it/s]
|
1130 |
91%|βββββββββ | 9714/10682 [1:22:36<07:58, 2.02it/s]
|
1131 |
91%|βββββββββ | 9715/10682 [1:22:37<07:57, 2.02it/s]
|
1132 |
91%|βββββββββ | 9716/10682 [1:22:37<07:57, 2.02it/s]
|
1133 |
91%|βββββββββ | 9717/10682 [1:22:38<07:56, 2.02it/s]
|
1134 |
91%|βββββββββ | 9718/10682 [1:22:38<07:56, 2.02it/s]
|
1135 |
91%|βββββββββ | 9719/10682 [1:22:39<07:55, 2.02it/s]
|
1136 |
91%|βββββββββ | 9720/10682 [1:22:39<07:54, 2.03it/s]
|
1137 |
91%|βββββββββ | 9721/10682 [1:22:40<07:54, 2.03it/s]
|
1138 |
91%|βββββββββ | 9722/10682 [1:22:40<07:54, 2.02it/s]
|
1139 |
91%|βββββββββ | 9723/10682 [1:22:41<07:53, 2.02it/s]
|
1140 |
91%|βββββββββ | 9724/10682 [1:22:41<07:53, 2.02it/s]
|
1141 |
91%|βββββββββ | 9725/10682 [1:22:42<07:52, 2.03it/s]
|
1142 |
{'loss': 2.756, 'grad_norm': 0.27851659059524536, 'learning_rate': 2.4255085494343522e-05, 'epoch': 12.74}
|
|
|
1143 |
91%|βββββββββ | 9725/10682 [1:22:42<07:52, 2.03it/s]
|
1144 |
91%|βββββββββ | 9726/10682 [1:22:42<07:53, 2.02it/s]
|
1145 |
91%|βββββββββ | 9727/10682 [1:22:42<07:51, 2.02it/s]
|
1146 |
91%|βββββββββ | 9728/10682 [1:22:43<07:51, 2.02it/s]
|
1147 |
91%|βββββββββ | 9729/10682 [1:22:43<07:51, 2.02it/s]
|
1148 |
91%|βββββββββ | 9730/10682 [1:22:44<07:50, 2.02it/s]
|
1149 |
91%|βββββββββ | 9731/10682 [1:22:44<07:50, 2.02it/s]
|
1150 |
91%|βββββββββ | 9732/10682 [1:22:45<07:49, 2.02it/s]
|
1151 |
91%|βββββββββ | 9733/10682 [1:22:45<07:48, 2.02it/s]
|
1152 |
91%|βββββββββ | 9734/10682 [1:22:46<07:48, 2.02it/s]
|
1153 |
91%|βββββββββ | 9735/10682 [1:22:46<07:48, 2.02it/s]
|
1154 |
91%|βββββββββ | 9736/10682 [1:22:47<07:47, 2.02it/s]
|
1155 |
91%|βββββββββ | 9737/10682 [1:22:47<07:47, 2.02it/s]
|
1156 |
91%|βββββββββ | 9738/10682 [1:22:48<07:46, 2.02it/s]
|
1157 |
91%|βββββββββ | 9739/10682 [1:22:48<07:46, 2.02it/s]
|
1158 |
91%|βββββββββ | 9740/10682 [1:22:49<07:45, 2.02it/s]
|
1159 |
91%|βββββββββ | 9741/10682 [1:22:49<07:45, 2.02it/s]
|
1160 |
91%|βββββββββ | 9742/10682 [1:22:50<07:44, 2.02it/s]
|
1161 |
91%|βββββββββ | 9743/10682 [1:22:50<07:44, 2.02it/s]
|
1162 |
91%|βββββββββ | 9744/10682 [1:22:51<07:44, 2.02it/s]
|
1163 |
91%|βββββββββ | 9745/10682 [1:22:51<07:43, 2.02it/s]
|
1164 |
91%|βββββββββ | 9746/10682 [1:22:52<07:42, 2.02it/s]
|
1165 |
91%|βββββββββ | 9747/10682 [1:22:52<07:42, 2.02it/s]
|
1166 |
91%|ββββββββββ| 9748/10682 [1:22:53<07:42, 2.02it/s]
|
1167 |
91%|ββββββββββ| 9749/10682 [1:22:53<07:41, 2.02it/s]
|
1168 |
91%|ββββββββββ| 9750/10682 [1:22:54<07:40, 2.02it/s]
|
1169 |
{'loss': 2.7534, 'grad_norm': 0.2752934694290161, 'learning_rate': 2.301407844110154e-05, 'epoch': 12.77}
|
|
|
1170 |
91%|ββββββββββ| 9750/10682 [1:22:54<07:40, 2.02it/s]
|
1171 |
91%|ββββββββββ| 9751/10682 [1:22:54<07:40, 2.02it/s]
|
1172 |
91%|ββββββββββ| 9752/10682 [1:22:55<07:39, 2.02it/s]
|
1173 |
91%|ββββββββββ| 9753/10682 [1:22:55<07:39, 2.02it/s]
|
1174 |
91%|ββββββββββ| 9754/10682 [1:22:56<07:39, 2.02it/s]
|
1175 |
91%|ββββββββββ| 9755/10682 [1:22:56<07:38, 2.02it/s]
|
1176 |
91%|ββββββββββ| 9756/10682 [1:22:57<07:38, 2.02it/s]
|
1177 |
91%|ββββββββββ| 9757/10682 [1:22:57<07:37, 2.02it/s]
|
1178 |
91%|ββββββββββ| 9758/10682 [1:22:58<07:37, 2.02it/s]
|
1179 |
91%|ββββββββββ| 9759/10682 [1:22:58<07:36, 2.02it/s]
|
1180 |
91%|ββββββββββ| 9760/10682 [1:22:59<07:36, 2.02it/s]
|
1181 |
91%|ββββββββββ| 9761/10682 [1:22:59<07:35, 2.02it/s]
|
1182 |
91%|ββββββββββ| 9762/10682 [1:23:00<07:34, 2.02it/s]
|
1183 |
91%|ββββββββββ| 9763/10682 [1:23:00<07:34, 2.02it/s]
|
1184 |
91%|ββββββββββ| 9764/10682 [1:23:01<07:33, 2.02it/s]
|
1185 |
91%|ββββββββββ| 9765/10682 [1:23:01<07:33, 2.02it/s]
|
1186 |
91%|ββββββββββ| 9766/10682 [1:23:02<07:32, 2.02it/s]
|
1187 |
91%|ββββββββββ| 9767/10682 [1:23:02<07:32, 2.02it/s]
|
1188 |
91%|ββββββββββ| 9768/10682 [1:23:03<07:32, 2.02it/s]
|
1189 |
91%|ββββββββββ| 9769/10682 [1:23:03<07:31, 2.02it/s]
|
1190 |
91%|ββββββββββ| 9770/10682 [1:23:04<07:31, 2.02it/s]
|
1191 |
91%|ββββββββββ| 9771/10682 [1:23:04<07:30, 2.02it/s]
|
1192 |
91%|ββββββββββ| 9772/10682 [1:23:05<07:30, 2.02it/s]
|
1193 |
91%|ββββββββββ| 9773/10682 [1:23:05<07:29, 2.02it/s]
|
1194 |
91%|ββββββββββ| 9774/10682 [1:23:06<07:28, 2.02it/s]
|
1195 |
92%|ββββββββββ| 9775/10682 [1:23:06<07:28, 2.02it/s]
|
1196 |
{'loss': 2.7569, 'grad_norm': 0.27959272265434265, 'learning_rate': 2.1804910798715826e-05, 'epoch': 12.8}
|
|
|
1197 |
92%|ββββββββββ| 9775/10682 [1:23:06<07:28, 2.02it/s]
|
1198 |
92%|ββββββββββ| 9776/10682 [1:23:07<07:28, 2.02it/s]
|
1199 |
92%|ββββββββββ| 9777/10682 [1:23:07<07:27, 2.02it/s]
|
1200 |
92%|ββββββββββ| 9778/10682 [1:23:08<07:27, 2.02it/s]
|
1201 |
92%|ββββββββββ| 9779/10682 [1:23:08<07:26, 2.02it/s]
|
1202 |
92%|ββββββββββ| 9780/10682 [1:23:09<07:25, 2.02it/s]
|
1203 |
92%|ββββββββββ| 9781/10682 [1:23:09<07:25, 2.02it/s]
|
1204 |
92%|ββββββββββ| 9782/10682 [1:23:10<07:25, 2.02it/s]
|
1205 |
92%|ββββββββββ| 9783/10682 [1:23:10<07:24, 2.02it/s]
|
1206 |
92%|ββββββββββ| 9784/10682 [1:23:11<07:24, 2.02it/s]
|
1207 |
92%|ββββββββββ| 9785/10682 [1:23:11<07:23, 2.02it/s]
|
1208 |
92%|ββββββββββ| 9786/10682 [1:23:12<07:23, 2.02it/s]
|
1209 |
92%|ββββββββββ| 9787/10682 [1:23:12<07:22, 2.02it/s]
|
1210 |
92%|ββββββββββ| 9788/10682 [1:23:13<07:21, 2.02it/s]
|
1211 |
92%|ββββββββββ| 9789/10682 [1:23:13<07:21, 2.02it/s]
|
1212 |
92%|ββββββββββ| 9790/10682 [1:23:14<07:20, 2.03it/s]
|
1213 |
92%|ββββββββββ| 9791/10682 [1:23:14<07:20, 2.02it/s]
|
1214 |
92%|ββββββββββ| 9792/10682 [1:23:15<07:19, 2.02it/s]
|
1215 |
92%|ββββββββββ| 9793/10682 [1:23:15<07:19, 2.02it/s]
|
1216 |
92%|ββββββββββ| 9794/10682 [1:23:16<07:18, 2.02it/s]
|
1217 |
92%|ββββββββββ| 9795/10682 [1:23:16<07:17, 2.03it/s]
|
1218 |
92%|ββββββββββ| 9796/10682 [1:23:17<07:17, 2.02it/s]
|
1219 |
92%|ββββββββββ| 9797/10682 [1:23:17<07:16, 2.03it/s]
|
1220 |
92%|ββββββββββ| 9798/10682 [1:23:18<07:16, 2.03it/s]
|
1221 |
92%|ββββββββββ| 9799/10682 [1:23:18<07:15, 2.03it/s]
|
1222 |
92%|ββββββββββ| 9800/10682 [1:23:19<07:15, 2.02it/s]
|
1223 |
|
|
|
1224 |
92%|ββββββββββ| 9800/10682 [1:23:19<07:15, 2.02it/s]
|
1225 |
92%|ββββββββββ| 9801/10682 [1:23:19<07:17, 2.01it/s]
|
1226 |
92%|ββββββββββ| 9802/10682 [1:23:20<07:16, 2.02it/s]
|
1227 |
92%|ββββββββββ| 9803/10682 [1:23:20<07:15, 2.02it/s]
|
1228 |
92%|ββββββββββ| 9804/10682 [1:23:21<07:14, 2.02it/s]
|
1229 |
92%|ββββββββββ| 9805/10682 [1:23:21<07:14, 2.02it/s]
|
1230 |
92%|ββββββββββ| 9806/10682 [1:23:22<07:13, 2.02it/s]
|
1231 |
92%|ββββββββββ| 9807/10682 [1:23:22<07:12, 2.02it/s]
|
1232 |
92%|ββββββββββ| 9808/10682 [1:23:23<07:12, 2.02it/s]
|
1233 |
92%|ββββββββββ| 9809/10682 [1:23:23<07:11, 2.02it/s]
|
1234 |
92%|ββββββββββ| 9810/10682 [1:23:24<07:11, 2.02it/s]
|
1235 |
92%|ββββββββββ| 9811/10682 [1:23:24<07:11, 2.02it/s]
|
1236 |
92%|ββββββββββ| 9812/10682 [1:23:25<07:10, 2.02it/s]
|
1237 |
92%|ββββββββββ| 9813/10682 [1:23:25<07:09, 2.02it/s]
|
1238 |
92%|ββββββββββ| 9814/10682 [1:23:26<07:09, 2.02it/s]
|
1239 |
92%|ββββββββββ| 9815/10682 [1:23:26<07:08, 2.02it/s]
|
1240 |
92%|ββββββββββ| 9816/10682 [1:23:27<07:08, 2.02it/s]
|
1241 |
92%|ββββββββββ| 9817/10682 [1:23:27<07:07, 2.02it/s]
|
1242 |
92%|ββββββββββ| 9818/10682 [1:23:28<07:07, 2.02it/s]
|
1243 |
92%|ββββββββββ| 9819/10682 [1:23:28<07:06, 2.02it/s]
|
1244 |
92%|ββββββββββ| 9820/10682 [1:23:28<07:06, 2.02it/s]
|
1245 |
92%|ββββββββββ| 9821/10682 [1:23:29<07:05, 2.02it/s]
|
1246 |
92%|ββββββββββ| 9822/10682 [1:23:29<07:05, 2.02it/s]
|
1247 |
92%|ββββββββββ| 9823/10682 [1:23:30<07:04, 2.02it/s]
|
1248 |
92%|ββββββββββ| 9824/10682 [1:23:30<07:04, 2.02it/s]
|
1249 |
92%|ββββββββββ| 9825/10682 [1:23:31<07:03, 2.02it/s]
|
1250 |
|
|
|
1251 |
92%|ββββββββββ| 9825/10682 [1:23:31<07:03, 2.02it/s]
|
1252 |
92%|ββββββββββ| 9826/10682 [1:23:31<07:03, 2.02it/s]
|
1253 |
92%|ββββββββββ| 9827/10682 [1:23:32<07:02, 2.02it/s]
|
1254 |
92%|ββββββββββ| 9828/10682 [1:23:32<07:02, 2.02it/s]
|
1255 |
92%|ββββββββββ| 9829/10682 [1:23:33<07:01, 2.02it/s]
|
1256 |
92%|ββββββββββ| 9830/10682 [1:23:33<07:01, 2.02it/s]
|
1257 |
92%|ββββββββββ| 9831/10682 [1:23:34<07:00, 2.02it/s]
|
1258 |
92%|ββββββββββ| 9832/10682 [1:23:34<06:59, 2.02it/s]
|
1259 |
92%|ββββββββββ| 9833/10682 [1:23:35<06:59, 2.02it/s]
|
1260 |
92%|ββββββββββ| 9834/10682 [1:23:35<06:58, 2.03it/s]
|
1261 |
92%|ββββββββββ| 9835/10682 [1:23:36<06:58, 2.02it/s]
|
1262 |
92%|ββββββββββ| 9836/10682 [1:23:36<06:57, 2.02it/s]
|
1263 |
92%|ββββββββββ| 9837/10682 [1:23:37<06:57, 2.02it/s]
|
1264 |
92%|ββββββββββ| 9838/10682 [1:23:37<06:57, 2.02it/s]
|
1265 |
92%|ββββββββββ| 9839/10682 [1:23:38<06:56, 2.02it/s]
|
1266 |
92%|ββββββββββ| 9840/10682 [1:23:38<06:56, 2.02it/s]
|
1267 |
92%|ββββββββββ| 9841/10682 [1:23:39<06:55, 2.02it/s]
|
1268 |
92%|ββββββββββ| 9842/10682 [1:23:39<06:55, 2.02it/s]
|
1269 |
92%|ββββββββββ| 9843/10682 [1:23:40<06:54, 2.02it/s]
|
1270 |
92%|ββββββββββ| 9844/10682 [1:23:40<06:54, 2.02it/s]
|
1271 |
92%|ββββββββββ| 9845/10682 [1:23:41<06:54, 2.02it/s]
|
1272 |
92%|ββββββββββ| 9846/10682 [1:23:41<06:54, 2.02it/s]
|
1273 |
92%|ββββββββββ| 9847/10682 [1:23:42<06:53, 2.02it/s]
|
1274 |
92%|ββββββββββ| 9848/10682 [1:23:42<06:52, 2.02it/s]
|
1275 |
92%|ββββββββββ| 9849/10682 [1:23:43<06:52, 2.02it/s]
|
1276 |
92%|ββββββββββ| 9850/10682 [1:23:43<06:51, 2.02it/s]{'loss': 2.7634, 'grad_norm': 0.27715760469436646, 'learning_rate': 1.8369240812535104e-05, 'epoch': 12.9}
|
1277 |
|
|
|
1278 |
92%|ββββββββββ| 9850/10682 [1:23:43<06:51, 2.02it/s]
|
1279 |
92%|ββββββββββ| 9851/10682 [1:23:44<06:51, 2.02it/s]
|
1280 |
92%|ββββββββββ| 9852/10682 [1:23:44<06:51, 2.02it/s]
|
1281 |
92%|ββββββββββ| 9853/10682 [1:23:45<06:50, 2.02it/s]
|
1282 |
92%|ββββββββββ| 9854/10682 [1:23:45<06:50, 2.02it/s]
|
1283 |
92%|ββββββββββ| 9855/10682 [1:23:46<06:49, 2.02it/s]
|
1284 |
92%|ββββββββββ| 9856/10682 [1:23:46<06:49, 2.02it/s]
|
1285 |
92%|ββββββββββ| 9857/10682 [1:23:47<06:47, 2.02it/s]
|
1286 |
92%|ββββββββββ| 9858/10682 [1:23:47<06:47, 2.02it/s]
|
1287 |
92%|ββββββββββ| 9859/10682 [1:23:48<06:46, 2.02it/s]
|
1288 |
92%|ββββββββββ| 9860/10682 [1:23:48<06:46, 2.02it/s]
|
1289 |
92%|ββββββββββ| 9861/10682 [1:23:49<06:45, 2.02it/s]
|
1290 |
92%|ββββββββββ| 9862/10682 [1:23:49<06:45, 2.02it/s]
|
1291 |
92%|ββββββββββ| 9863/10682 [1:23:50<06:44, 2.02it/s]
|
1292 |
92%|ββββββββββ| 9864/10682 [1:23:50<06:44, 2.02it/s]
|
1293 |
92%|ββββββββββ| 9865/10682 [1:23:51<06:43, 2.02it/s]
|
1294 |
92%|ββββββββββ| 9866/10682 [1:23:51<06:43, 2.02it/s]
|
1295 |
92%|ββββββββββ| 9867/10682 [1:23:52<06:43, 2.02it/s]
|
1296 |
92%|ββββββββββ| 9868/10682 [1:23:52<06:42, 2.02it/s]
|
1297 |
92%|ββββββββββ| 9869/10682 [1:23:53<06:42, 2.02it/s]
|
1298 |
92%|ββββββββββ| 9870/10682 [1:23:53<06:41, 2.02it/s]
|
1299 |
92%|ββββββββββ| 9871/10682 [1:23:54<06:40, 2.02it/s]
|
1300 |
92%|ββββββββββ| 9872/10682 [1:23:54<06:40, 2.02it/s]
|
1301 |
92%|ββββββββββ| 9873/10682 [1:23:55<06:39, 2.02it/s]
|
1302 |
92%|ββββββββββ| 9874/10682 [1:23:55<06:39, 2.02it/s]
|
1303 |
92%|ββββββββββ| 9875/10682 [1:23:56<06:38, 2.02it/s]
|
1304 |
|
|
|
1305 |
92%|ββββββββββ| 9875/10682 [1:23:56<06:38, 2.02it/s]
|
1306 |
92%|ββββββββββ| 9876/10682 [1:23:56<06:38, 2.02it/s]
|
1307 |
92%|ββββββββββ| 9877/10682 [1:23:57<06:38, 2.02it/s]
|
1308 |
92%|ββββββββββ| 9878/10682 [1:23:57<06:38, 2.02it/s]
|
1309 |
92%|ββββββββββ| 9879/10682 [1:23:58<06:37, 2.02it/s]
|
1310 |
92%|ββββββββββ| 9880/10682 [1:23:58<06:36, 2.02it/s]
|
1311 |
93%|ββββββββββ| 9881/10682 [1:23:59<06:35, 2.02it/s]
|
1312 |
93%|ββββββββββ| 9882/10682 [1:23:59<06:35, 2.02it/s]
|
1313 |
93%|ββββββββββ| 9883/10682 [1:24:00<06:35, 2.02it/s]
|
1314 |
93%|ββββββββββ| 9884/10682 [1:24:00<06:34, 2.02it/s]
|
1315 |
93%|ββββββββββ| 9885/10682 [1:24:01<06:34, 2.02it/s]
|
1316 |
93%|ββββββββββ| 9886/10682 [1:24:01<06:33, 2.02it/s]
|
1317 |
93%|ββββββββββ| 9887/10682 [1:24:02<06:32, 2.02it/s]
|
1318 |
93%|ββββββββββ| 9888/10682 [1:24:02<06:32, 2.02it/s]
|
1319 |
93%|ββββββββββ| 9889/10682 [1:24:03<06:32, 2.02it/s]
|
1320 |
93%|ββββββββββ| 9890/10682 [1:24:03<06:31, 2.02it/s]
|
1321 |
93%|ββββββββββ| 9891/10682 [1:24:04<06:31, 2.02it/s]
|
1322 |
93%|ββββββββββ| 9892/10682 [1:24:04<06:30, 2.02it/s]
|
1323 |
93%|ββββββββββ| 9893/10682 [1:24:05<06:29, 2.02it/s]
|
1324 |
93%|ββββββββββ| 9894/10682 [1:24:05<06:29, 2.02it/s]
|
1325 |
93%|ββββββββββ| 9895/10682 [1:24:06<06:28, 2.03it/s]
|
1326 |
93%|ββββββββββ| 9896/10682 [1:24:06<06:28, 2.02it/s]
|
1327 |
93%|ββββββββββ| 9897/10682 [1:24:07<06:27, 2.02it/s]
|
1328 |
93%|ββββββββββ| 9898/10682 [1:24:07<06:27, 2.02it/s]
|
1329 |
93%|ββββββββββ| 9899/10682 [1:24:08<06:26, 2.02it/s]
|
1330 |
93%|ββββββββββ| 9900/10682 [1:24:08<06:26, 2.02it/s]{'loss': 2.7483, 'grad_norm': 0.2770647406578064, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
1331 |
|
|
|
1332 |
93%|ββββββββββ| 9900/10682 [1:24:08<06:26, 2.02it/s]
|
1333 |
93%|ββββββββββ| 9901/10682 [1:24:09<06:27, 2.02it/s]
|
1334 |
93%|ββββββββββ| 9902/10682 [1:24:09<06:26, 2.02it/s]
|
1335 |
93%|ββββββββββ| 9903/10682 [1:24:10<06:25, 2.02it/s]
|
1336 |
93%|ββββββββββ| 9904/10682 [1:24:10<06:25, 2.02it/s]
|
1337 |
93%|ββββββββββ| 9905/10682 [1:24:11<06:24, 2.02it/s]
|
1338 |
93%|ββββββββββ| 9906/10682 [1:24:11<06:23, 2.02it/s]
|
1339 |
93%|ββββββββββ| 9907/10682 [1:24:12<06:23, 2.02it/s]
|
1340 |
93%|ββββββββββ| 9908/10682 [1:24:12<06:23, 2.02it/s]
|
1341 |
93%|ββββββββββ| 9909/10682 [1:24:13<06:22, 2.02it/s]
|
1342 |
93%|ββββββββββ| 9910/10682 [1:24:13<06:21, 2.02it/s]
|
1343 |
93%|ββββββββββ| 9911/10682 [1:24:13<06:21, 2.02it/s]
|
1344 |
93%|ββββββββββ| 9912/10682 [1:24:14<06:20, 2.02it/s]
|
1345 |
93%|ββββββββββ| 9913/10682 [1:24:14<06:20, 2.02it/s]
|
1346 |
93%|ββββββββββ| 9914/10682 [1:24:15<06:19, 2.02it/s]
|
1347 |
93%|ββββββββββ| 9915/10682 [1:24:15<06:19, 2.02it/s]
|
1348 |
93%|ββββββββββ| 9916/10682 [1:24:16<06:18, 2.02it/s]
|
1349 |
93%|ββββββββββ| 9917/10682 [1:24:16<06:18, 2.02it/s]
|
1350 |
93%|ββββββββββ| 9918/10682 [1:24:17<06:17, 2.02it/s]
|
1351 |
93%|ββββββββββ| 9919/10682 [1:24:17<06:17, 2.02it/s]
|
1352 |
93%|ββββββββββ| 9920/10682 [1:24:18<06:16, 2.02it/s]
|
1353 |
93%|ββββββββββ| 9921/10682 [1:24:18<06:16, 2.02it/s]
|
1354 |
93%|ββββββββββ| 9922/10682 [1:24:19<06:15, 2.02it/s]
|
1355 |
93%|ββββββββββ| 9923/10682 [1:24:19<06:15, 2.02it/s]
|
1356 |
93%|ββββββββββ| 9924/10682 [1:24:20<06:15, 2.02it/s]
|
1357 |
93%|ββββββββββ| 9925/10682 [1:24:20<06:23, 1.98it/s]
|
1358 |
|
|
|
1359 |
93%|ββββββββββ| 9925/10682 [1:24:20<06:23, 1.98it/s]
|
1360 |
93%|ββββββββββ| 9926/10682 [1:24:32<49:46, 3.95s/it]
|
1361 |
93%|ββββββββββ| 9927/10682 [1:24:33<36:40, 2.91s/it]
|
1362 |
93%|ββββββββββ| 9928/10682 [1:24:33<27:31, 2.19s/it]
|
1363 |
93%|ββββββββββ| 9929/10682 [1:24:34<21:07, 1.68s/it]
|
1364 |
93%|ββββββββββ| 9930/10682 [1:24:34<16:38, 1.33s/it]
|
1365 |
93%|ββββββββββ| 9931/10682 [1:24:35<13:29, 1.08s/it]
|
1366 |
93%|ββββββββββ| 9932/10682 [1:24:35<11:17, 1.11it/s]
|
1367 |
93%|ββββββββββ| 9933/10682 [1:24:36<09:47, 1.28it/s]
|
1368 |
93%|ββββββββββ| 9934/10682 [1:24:36<08:42, 1.43it/s]
|
1369 |
93%|ββββββββββ| 9935/10682 [1:24:37<07:55, 1.57it/s]
|
1370 |
93%|ββββββββββ| 9936/10682 [1:24:37<07:22, 1.68it/s]
|
1371 |
93%|ββββββββββ| 9937/10682 [1:24:38<06:59, 1.78it/s]
|
1372 |
93%|ββββββββββ| 9938/10682 [1:24:38<06:43, 1.84it/s]
|
1373 |
93%|ββββββββββ| 9939/10682 [1:24:39<06:32, 1.89it/s]
|
1374 |
93%|ββββββββββ| 9940/10682 [1:24:39<06:24, 1.93it/s]
|
1375 |
93%|ββββββββββ| 9941/10682 [1:24:40<06:19, 1.95it/s]
|
1376 |
93%|ββββββββββ| 9942/10682 [1:24:40<06:15, 1.97it/s]
|
1377 |
93%|ββββββββββ| 9943/10682 [1:24:41<06:12, 1.99it/s]
|
1378 |
93%|ββββββββββ| 9944/10682 [1:24:41<06:10, 1.99it/s]
|
1379 |
93%|ββββββββββ| 9945/10682 [1:24:42<06:08, 2.00it/s]
|
1380 |
93%|ββββββββββ| 9946/10682 [1:24:42<06:06, 2.01it/s]
|
1381 |
93%|ββββββββββ| 9947/10682 [1:24:43<06:05, 2.01it/s]
|
1382 |
93%|ββββββββββ| 9948/10682 [1:24:43<06:04, 2.01it/s]
|
1383 |
93%|ββββββββββ| 9949/10682 [1:24:44<06:04, 2.01it/s]
|
|
|
452 |
|
453 |
85%|βββββββββ | 9100/10682 [1:17:21<13:01, 2.02it/s]
|
454 |
85%|βββββββββ | 9101/10682 [1:17:21<13:02, 2.02it/s]
|
455 |
85%|βββββββββ | 9102/10682 [1:17:22<13:01, 2.02it/s]
|
456 |
85%|βββββββββ | 9103/10682 [1:17:22<13:00, 2.02it/s]
|
457 |
85%|βββββββββ | 9104/10682 [1:17:23<13:00, 2.02it/s]
|
458 |
85%|βββββββββ | 9105/10682 [1:17:23<13:00, 2.02it/s]
|
459 |
85%|βββββββββ | 9106/10682 [1:17:24<13:00, 2.02it/s]
|
460 |
85%|βββββββββ | 9107/10682 [1:17:24<12:59, 2.02it/s]
|
461 |
85%|βββββββββ | 9108/10682 [1:17:25<12:58, 2.02it/s]
|
462 |
85%|βββββββββ | 9109/10682 [1:17:25<12:57, 2.02it/s]
|
463 |
85%|βββββββββ | 9110/10682 [1:17:26<12:57, 2.02it/s]
|
464 |
85%|βββββββββ | 9111/10682 [1:17:26<12:56, 2.02it/s]
|
465 |
85%|βββββββββ | 9112/10682 [1:17:27<12:56, 2.02it/s]
|
466 |
85%|βββββββββ | 9113/10682 [1:17:27<12:56, 2.02it/s]
|
467 |
85%|βββββββββ | 9114/10682 [1:17:28<12:54, 2.02it/s]
|
468 |
85%|βββββββββ | 9115/10682 [1:17:28<12:54, 2.02it/s]
|
469 |
85%|βββββββββ | 9116/10682 [1:17:29<12:54, 2.02it/s]
|
470 |
85%|βββββββββ | 9117/10682 [1:17:29<12:54, 2.02it/s]
|
471 |
85%|βββββββββ | 9118/10682 [1:17:30<12:53, 2.02it/s]
|
472 |
85%|βββββββββ | 9119/10682 [1:17:30<12:52, 2.02it/s]
|
473 |
85%|βββββββββ | 9120/10682 [1:17:31<12:52, 2.02it/s]
|
474 |
85%|βββββββββ | 9121/10682 [1:17:31<12:51, 2.02it/s]
|
475 |
85%|βββββββββ | 9122/10682 [1:17:32<12:51, 2.02it/s]
|
476 |
85%|βββββββββ | 9123/10682 [1:17:32<12:51, 2.02it/s]
|
477 |
85%|βββββββββ | 9124/10682 [1:17:33<12:51, 2.02it/s]
|
478 |
85%|βββββββββ | 9125/10682 [1:17:33<12:50, 2.02it/s]{'loss': 2.7976, 'grad_norm': 0.275244802236557, 'learning_rate': 6.334441157888504e-05, 'epoch': 11.95}
|
479 |
|
480 |
|
481 |
85%|βββββββββ | 9125/10682 [1:17:33<12:50, 2.02it/s]
|
482 |
85%|βββββββββ | 9126/10682 [1:17:34<12:52, 2.01it/s]
|
483 |
85%|βββββββββ | 9127/10682 [1:17:34<12:51, 2.02it/s]
|
484 |
85%|βββββββββ | 9128/10682 [1:17:35<12:49, 2.02it/s]
|
485 |
85%|βββββββββ | 9129/10682 [1:17:35<12:49, 2.02it/s]
|
486 |
85%|βββββββββ | 9130/10682 [1:17:36<12:47, 2.02it/s]
|
487 |
85%|βββββββββ | 9131/10682 [1:17:36<12:46, 2.02it/s]
|
488 |
85%|βββββββββ | 9132/10682 [1:17:37<12:46, 2.02it/s]
|
489 |
85%|βββββββββ | 9133/10682 [1:17:37<12:45, 2.02it/s]
|
490 |
86%|βββββββββ | 9134/10682 [1:17:38<12:45, 2.02it/s]
|
491 |
86%|βββββββββ | 9135/10682 [1:17:38<12:44, 2.02it/s]
|
492 |
86%|βββββββββ | 9136/10682 [1:17:39<12:44, 2.02it/s]
|
493 |
86%|βββββββββ | 9137/10682 [1:17:39<12:44, 2.02it/s]
|
494 |
86%|βββββββββ | 9138/10682 [1:17:40<12:43, 2.02it/s]
|
495 |
86%|βββββββββ | 9139/10682 [1:17:40<12:42, 2.02it/s]
|
496 |
86%|βββββββββ | 9140/10682 [1:17:41<12:42, 2.02it/s]
|
497 |
86%|βββββββββ | 9141/10682 [1:17:41<12:41, 2.02it/s]
|
498 |
86%|βββββββββ | 9142/10682 [1:17:42<12:41, 2.02it/s]
|
499 |
86%|βββββββββ | 9143/10682 [1:17:42<12:41, 2.02it/s]
|
500 |
86%|βββββββββ | 9144/10682 [1:17:42<12:41, 2.02it/s]
|
501 |
86%|βββββββββ | 9145/10682 [1:17:43<12:40, 2.02it/s]
|
502 |
86%|βββββββββ | 9146/10682 [1:17:43<12:39, 2.02it/s]
|
503 |
86%|βββββββββ | 9147/10682 [1:17:44<12:38, 2.02it/s]
|
504 |
86%|βββββββββ | 9148/10682 [1:17:44<12:38, 2.02it/s]
|
505 |
86%|βββββββββ | 9149/10682 [1:17:45<12:36, 2.03it/s]
|
506 |
86%|βββββββββ | 9150/10682 [1:17:45<12:36, 2.02it/s]
|
507 |
|
508 |
|
509 |
86%|βββββββββ | 9150/10682 [1:17:45<12:36, 2.02it/s]
|
510 |
86%|βββββββββ | 9151/10682 [1:17:46<12:36, 2.02it/s]
|
511 |
86%|βββββββββ | 9152/10682 [1:17:46<12:36, 2.02it/s]
|
512 |
86%|βββββββββ | 9153/10682 [1:17:47<12:35, 2.02it/s]
|
513 |
86%|βββββββββ | 9154/10682 [1:17:47<12:34, 2.02it/s]
|
514 |
86%|βββββββββ | 9155/10682 [1:17:48<12:34, 2.02it/s]
|
515 |
86%|βββββββββ | 9156/10682 [1:17:48<12:34, 2.02it/s]
|
516 |
86%|βββββββββ | 9157/10682 [1:17:49<12:34, 2.02it/s]
|
517 |
86%|βββββββββ | 9158/10682 [1:17:49<12:34, 2.02it/s]
|
518 |
86%|βββββββββ | 9159/10682 [1:17:50<12:33, 2.02it/s]
|
519 |
86%|βββββββββ | 9160/10682 [1:17:50<12:32, 2.02it/s]
|
520 |
86%|βββββββββ | 9161/10682 [1:17:51<12:32, 2.02it/s]
|
521 |
86%|βββββββββ | 9162/10682 [1:17:51<12:23, 2.04it/s]
|
522 |
86%|βββββββββ | 9163/10682 [1:18:03<1:39:32, 3.93s/it]
|
523 |
86%|βββββββββ | 9164/10682 [1:18:04<1:13:27, 2.90s/it]
|
524 |
86%|βββββββββ | 9165/10682 [1:18:04<55:07, 2.18s/it]
|
525 |
86%|βββββββββ | 9166/10682 [1:18:05<42:23, 1.68s/it]
|
526 |
86%|βββββββββ | 9167/10682 [1:18:05<33:24, 1.32s/it]
|
527 |
86%|βββββββββ | 9168/10682 [1:18:06<27:07, 1.07s/it]
|
528 |
86%|βββββββββ | 9169/10682 [1:18:06<22:43, 1.11it/s]
|
529 |
86%|βββββββββ | 9170/10682 [1:18:07<19:41, 1.28it/s]
|
530 |
86%|βββββββββ | 9171/10682 [1:18:07<17:30, 1.44it/s]
|
531 |
86%|βββββββββ | 9172/10682 [1:18:08<15:58, 1.57it/s]
|
532 |
86%|βββββββββ | 9173/10682 [1:18:08<14:55, 1.69it/s]
|
533 |
86%|βββββββββ | 9174/10682 [1:18:09<14:09, 1.78it/s]
|
534 |
86%|βββββββββ | 9175/10682 [1:18:09<13:39, 1.84it/s]
|
535 |
|
|
|
536 |
86%|βββββββββ | 9175/10682 [1:18:09<13:39, 1.84it/s]
|
537 |
86%|βββββββββ | 9176/10682 [1:18:10<13:16, 1.89it/s]
|
538 |
86%|βββββββββ | 9177/10682 [1:18:10<13:00, 1.93it/s]
|
539 |
86%|βββββββββ | 9178/10682 [1:18:11<12:49, 1.95it/s]
|
540 |
86%|βββββββββ | 9179/10682 [1:18:11<12:41, 1.97it/s]
|
541 |
86%|βββββββββ | 9180/10682 [1:18:12<12:35, 1.99it/s]
|
542 |
86%|βββββββββ | 9181/10682 [1:18:12<12:32, 1.99it/s]
|
543 |
86%|βββββββββ | 9182/10682 [1:18:13<12:28, 2.00it/s]
|
544 |
86%|βββββββββ | 9183/10682 [1:18:13<12:26, 2.01it/s]
|
545 |
86%|βββββββββ | 9184/10682 [1:18:14<12:24, 2.01it/s]
|
546 |
86%|βββββββββ | 9185/10682 [1:18:14<12:23, 2.01it/s]
|
547 |
86%|βββββββββ | 9186/10682 [1:18:15<12:21, 2.02it/s]
|
548 |
+
|
549 |
86%|βββββββββ | 9175/10682 [1:18:09<13:39, 1.84it/s]
|
550 |
86%|βββββββββ | 9176/10682 [1:18:10<13:16, 1.89it/s]
|
551 |
86%|βββββββββ | 9177/10682 [1:18:10<13:00, 1.93it/s]
|
552 |
86%|βββββββββ | 9178/10682 [1:18:11<12:49, 1.95it/s]
|
553 |
86%|βββββββββ | 9179/10682 [1:18:11<12:41, 1.97it/s]
|
554 |
86%|βββββββββ | 9180/10682 [1:18:12<12:35, 1.99it/s]
|
555 |
86%|βββββββββ | 9181/10682 [1:18:12<12:32, 1.99it/s]
|
556 |
86%|βββββββββ | 9182/10682 [1:18:13<12:28, 2.00it/s]
|
557 |
86%|βββββββββ | 9183/10682 [1:18:13<12:26, 2.01it/s]
|
558 |
86%|βββββββββ | 9184/10682 [1:18:14<12:24, 2.01it/s]
|
559 |
86%|βββββββββ | 9185/10682 [1:18:14<12:23, 2.01it/s]
|
560 |
86%|βββββββββ | 9186/10682 [1:18:15<12:21, 2.02it/s]
|
561 |
86%|βββββββββ | 9187/10682 [1:18:15<12:25, 2.01it/s]
|
562 |
86%|βββββββββ | 9188/10682 [1:18:16<12:23, 2.01it/s]
|
563 |
86%|βββββββββ | 9189/10682 [1:18:16<12:21, 2.01it/s]
|
564 |
86%|βββββββββ | 9190/10682 [1:18:17<12:20, 2.02it/s]
|
565 |
86%|βββββββββ | 9191/10682 [1:18:17<12:18, 2.02it/s]
|
566 |
86%|βββββββββ | 9192/10682 [1:18:18<12:18, 2.02it/s]
|
567 |
86%|βββββββββ | 9193/10682 [1:18:18<12:16, 2.02it/s]
|
568 |
86%|βββββββββ | 9194/10682 [1:18:19<12:15, 2.02it/s]
|
569 |
86%|βββββββββ | 9195/10682 [1:18:19<12:14, 2.02it/s]
|
570 |
86%|βββββββββ | 9196/10682 [1:18:20<12:14, 2.02it/s]
|
571 |
86%|βββββββββ | 9197/10682 [1:18:20<12:13, 2.02it/s]
|
572 |
86%|βββββββββ | 9198/10682 [1:18:21<12:12, 2.02it/s]
|
573 |
86%|βββββββββ | 9199/10682 [1:18:21<12:12, 2.02it/s]
|
574 |
86%|βββββββββ | 9200/10682 [1:18:22<12:12, 2.02it/s]{'loss': 2.7368, 'grad_norm': 0.28075122833251953, 'learning_rate': 5.7505864256519716e-05, 'epoch': 12.05}
|
575 |
|
576 |
+
|
577 |
86%|βββββββββ | 9200/10682 [1:18:22<12:12, 2.02it/s]
|
578 |
86%|βββββββββ | 9201/10682 [1:18:22<12:12, 2.02it/s]
|
579 |
86%|βββββββββ | 9202/10682 [1:18:23<12:11, 2.02it/s]
|
580 |
86%|βββββββββ | 9203/10682 [1:18:23<12:10, 2.02it/s]
|
581 |
86%|βββββββββ | 9204/10682 [1:18:24<12:10, 2.02it/s]
|
582 |
86%|βββββββββ | 9205/10682 [1:18:24<12:09, 2.02it/s]
|
583 |
86%|βββββββββ | 9206/10682 [1:18:25<12:09, 2.02it/s]
|
584 |
86%|βββββββββ | 9207/10682 [1:18:25<12:08, 2.02it/s]
|
585 |
86%|βββββββββ | 9208/10682 [1:18:26<12:07, 2.03it/s]
|
586 |
86%|βββββββββ | 9209/10682 [1:18:26<12:07, 2.02it/s]
|
587 |
86%|βββββββββ | 9210/10682 [1:18:27<12:06, 2.03it/s]
|
588 |
86%|βββββββββ | 9211/10682 [1:18:27<12:06, 2.02it/s]
|
589 |
86%|βββββββββ | 9212/10682 [1:18:28<12:05, 2.02it/s]
|
590 |
86%|βββββββββ | 9213/10682 [1:18:28<12:05, 2.02it/s]
|
591 |
86%|βββββββββ | 9214/10682 [1:18:29<12:04, 2.03it/s]
|
592 |
86%|βββββββββ | 9215/10682 [1:18:29<12:05, 2.02it/s]
|
593 |
86%|βββββββββ | 9216/10682 [1:18:30<12:04, 2.02it/s]
|
594 |
86%|βββββββββ | 9217/10682 [1:18:30<12:04, 2.02it/s]
|
595 |
86%|βββββββββ | 9218/10682 [1:18:31<12:03, 2.02it/s]
|
596 |
86%|βββββββββ | 9219/10682 [1:18:31<12:03, 2.02it/s]
|
597 |
86%|βββββββββ | 9220/10682 [1:18:32<12:03, 2.02it/s]
|
598 |
86%|βββββββββ | 9221/10682 [1:18:32<12:03, 2.02it/s]
|
599 |
86%|βββββββββ | 9222/10682 [1:18:33<12:01, 2.02it/s]
|
600 |
86%|βββββββββ | 9223/10682 [1:18:33<12:01, 2.02it/s]
|
601 |
86%|βββββββββ | 9224/10682 [1:18:34<12:00, 2.02it/s]
|
602 |
86%|βββββββββ | 9225/10682 [1:18:34<11:59, 2.02it/s]{'loss': 2.7433, 'grad_norm': 0.2763462960720062, 'learning_rate': 5.561858464291258e-05, 'epoch': 12.08}
|
603 |
+
|
604 |
|
605 |
86%|βββββββββ | 9225/10682 [1:18:34<11:59, 2.02it/s]
|
606 |
86%|βββββββββ | 9226/10682 [1:18:35<11:59, 2.02it/s]
|
607 |
86%|βββββββββ | 9227/10682 [1:18:35<12:00, 2.02it/s]
|
608 |
86%|βββββββββ | 9228/10682 [1:18:36<11:58, 2.02it/s]
|
609 |
86%|βββββββββ | 9229/10682 [1:18:36<11:58, 2.02it/s]
|
610 |
86%|βββββββββ | 9230/10682 [1:18:37<11:57, 2.02it/s]
|
611 |
86%|βββββββββ | 9231/10682 [1:18:37<11:57, 2.02it/s]
|
612 |
86%|βββββββββ | 9232/10682 [1:18:37<11:56, 2.02it/s]
|
613 |
86%|βββββββββ | 9233/10682 [1:18:38<11:56, 2.02it/s]
|
614 |
86%|βββββββββ | 9234/10682 [1:18:38<11:55, 2.02it/s]
|
615 |
86%|βββββββββ | 9235/10682 [1:18:39<11:55, 2.02it/s]
|
616 |
86%|βββββββββ | 9236/10682 [1:18:39<11:54, 2.02it/s]
|
617 |
86%|βββββββββ | 9237/10682 [1:18:40<11:54, 2.02it/s]
|
618 |
86%|βββββββββ | 9238/10682 [1:18:40<11:54, 2.02it/s]
|
619 |
86%|βββββββββ | 9239/10682 [1:18:41<11:53, 2.02it/s]
|
620 |
87%|βββββββββ | 9240/10682 [1:18:41<11:52, 2.02it/s]
|
621 |
87%|βββββββββ | 9241/10682 [1:18:42<11:53, 2.02it/s]
|
622 |
87%|βββββββββ | 9242/10682 [1:18:42<11:52, 2.02it/s]
|
623 |
87%|βββββββββ | 9243/10682 [1:18:43<11:51, 2.02it/s]
|
624 |
87%|βββββββββ | 9244/10682 [1:18:43<11:51, 2.02it/s]
|
625 |
87%|βββββββββ | 9245/10682 [1:18:44<11:50, 2.02it/s]
|
626 |
87%|βββββββββ | 9246/10682 [1:18:44<11:50, 2.02it/s]
|
627 |
87%|βββββββββ | 9247/10682 [1:18:45<11:49, 2.02it/s]
|
628 |
87%|βββββββββ | 9248/10682 [1:18:45<11:49, 2.02it/s]
|
629 |
87%|βββββββββ | 9249/10682 [1:18:46<11:47, 2.02it/s]
|
630 |
87%|βββββββββ | 9250/10682 [1:18:46<11:47, 2.02it/s]
|
631 |
|
632 |
+
|
633 |
87%|βββββββββ | 9250/10682 [1:18:46<11:47, 2.02it/s]
|
634 |
87%|βββββββββ | 9251/10682 [1:18:47<11:47, 2.02it/s]
|
635 |
87%|βββββββββ | 9252/10682 [1:18:47<11:46, 2.02it/s]
|
636 |
87%|βββββββββ | 9253/10682 [1:18:48<11:45, 2.03it/s]
|
637 |
87%|βββββββββ | 9254/10682 [1:18:48<11:45, 2.02it/s]
|
638 |
87%|βββββββββ | 9255/10682 [1:18:49<11:44, 2.02it/s]
|
639 |
87%|βββββββββ | 9256/10682 [1:18:49<11:45, 2.02it/s]
|
640 |
87%|βββββββββ | 9257/10682 [1:18:50<11:44, 2.02it/s]
|
641 |
87%|βββββββββ | 9258/10682 [1:18:50<11:44, 2.02it/s]
|
642 |
87%|βββββββββ | 9259/10682 [1:18:51<11:43, 2.02it/s]
|
643 |
87%|βββββββββ | 9260/10682 [1:18:51<11:43, 2.02it/s]
|
644 |
87%|βββββββββ | 9261/10682 [1:18:52<11:42, 2.02it/s]
|
645 |
87%|βββββββββ | 9262/10682 [1:18:52<11:41, 2.02it/s]
|
646 |
87%|βββββββββ | 9263/10682 [1:18:53<11:41, 2.02it/s]
|
647 |
87%|βββββββββ | 9264/10682 [1:18:53<11:40, 2.02it/s]
|
648 |
87%|βββββββββ | 9265/10682 [1:18:54<11:40, 2.02it/s]
|
649 |
87%|βββββββββ | 9266/10682 [1:18:54<11:39, 2.02it/s]
|
650 |
87%|βββββββββ | 9267/10682 [1:18:55<11:39, 2.02it/s]
|
651 |
87%|βββββββββ | 9268/10682 [1:18:55<11:38, 2.02it/s]
|
652 |
87%|βββββββββ | 9269/10682 [1:18:56<11:38, 2.02it/s]
|
653 |
87%|βββββββββ | 9270/10682 [1:18:56<11:37, 2.02it/s]
|
654 |
87%|βββββββββ | 9271/10682 [1:18:57<11:37, 2.02it/s]
|
655 |
87%|βββββββββ | 9272/10682 [1:18:57<11:36, 2.02it/s]
|
656 |
87%|βββββββββ | 9273/10682 [1:18:58<11:36, 2.02it/s]
|
657 |
87%|βββββββββ | 9274/10682 [1:18:58<11:35, 2.02it/s]
|
658 |
87%|βββββββββ | 9275/10682 [1:18:59<11:35, 2.02it/s]
|
659 |
|
660 |
+
|
661 |
87%|βββββββββ | 9275/10682 [1:18:59<11:35, 2.02it/s]
|
662 |
87%|βββββββββ | 9276/10682 [1:18:59<11:35, 2.02it/s]
|
663 |
87%|βββββββββ | 9277/10682 [1:19:00<11:35, 2.02it/s]
|
664 |
87%|βββββββββ | 9278/10682 [1:19:00<11:34, 2.02it/s]
|
665 |
87%|βββββββββ | 9279/10682 [1:19:01<11:34, 2.02it/s]
|
666 |
87%|βββββββββ | 9280/10682 [1:19:01<11:32, 2.02it/s]
|
667 |
87%|βββββββββ | 9281/10682 [1:19:02<11:32, 2.02it/s]
|
668 |
87%|βββββββββ | 9282/10682 [1:19:02<11:31, 2.02it/s]
|
669 |
87%|βββββββββ | 9283/10682 [1:19:03<11:31, 2.02it/s]
|
670 |
87%|βββββββββ | 9284/10682 [1:19:03<11:30, 2.02it/s]
|
671 |
87%|βββββββββ | 9285/10682 [1:19:04<11:31, 2.02it/s]
|
672 |
87%|βββββββββ | 9286/10682 [1:19:04<11:30, 2.02it/s]
|
673 |
87%|βββββββββ | 9287/10682 [1:19:05<11:30, 2.02it/s]
|
674 |
87%|βββββββββ | 9288/10682 [1:19:05<11:29, 2.02it/s]
|
675 |
87%|βββββββββ | 9289/10682 [1:19:06<11:29, 2.02it/s]
|
676 |
87%|βββββββββ | 9290/10682 [1:19:06<11:28, 2.02it/s]
|
677 |
87%|βββββββββ | 9291/10682 [1:19:07<11:28, 2.02it/s]
|
678 |
87%|βββββββββ | 9292/10682 [1:19:07<11:28, 2.02it/s]
|
679 |
87%|βββββββββ | 9293/10682 [1:19:08<11:27, 2.02it/s]
|
680 |
87%|βββββββββ | 9294/10682 [1:19:08<11:27, 2.02it/s]
|
681 |
87%|βββββββββ | 9295/10682 [1:19:09<11:26, 2.02it/s]
|
682 |
87%|βββββββββ | 9296/10682 [1:19:09<11:25, 2.02it/s]
|
683 |
87%|βββββββββ | 9297/10682 [1:19:10<11:25, 2.02it/s]
|
684 |
87%|βββββββββ | 9298/10682 [1:19:10<11:24, 2.02it/s]
|
685 |
87%|βββββββββ | 9299/10682 [1:19:11<11:24, 2.02it/s]
|
686 |
87%|βββββββββ | 9300/10682 [1:19:11<11:24, 2.02it/s]{'loss': 2.7346, 'grad_norm': 0.27623340487480164, 'learning_rate': 5.0135217920839137e-05, 'epoch': 12.18}
|
687 |
+
|
688 |
|
689 |
87%|βββββββββ | 9300/10682 [1:19:11<11:24, 2.02it/s]
|
690 |
87%|βββββββββ | 9301/10682 [1:19:12<11:24, 2.02it/s]
|
691 |
87%|βββββββββ | 9302/10682 [1:19:12<11:22, 2.02it/s]
|
692 |
87%|βββββββββ | 9303/10682 [1:19:13<11:22, 2.02it/s]
|
693 |
87%|βββββββββ | 9304/10682 [1:19:13<11:21, 2.02it/s]
|
694 |
87%|βββββββββ | 9305/10682 [1:19:14<11:21, 2.02it/s]
|
695 |
87%|βββββββββ | 9306/10682 [1:19:14<11:19, 2.02it/s]
|
696 |
87%|βββββββββ | 9307/10682 [1:19:15<11:19, 2.02it/s]
|
697 |
87%|βββββββββ | 9308/10682 [1:19:15<11:18, 2.02it/s]
|
698 |
87%|βββββββββ | 9309/10682 [1:19:16<11:18, 2.02it/s]
|
699 |
87%|βββββββββ | 9310/10682 [1:19:16<11:17, 2.02it/s]
|
700 |
87%|βββββββββ | 9311/10682 [1:19:17<11:17, 2.02it/s]
|
701 |
87%|βββββββββ | 9312/10682 [1:19:17<11:16, 2.03it/s]
|
702 |
87%|βββββββββ | 9313/10682 [1:19:18<11:16, 2.02it/s]
|
703 |
87%|βββββββββ | 9314/10682 [1:19:18<11:15, 2.02it/s]
|
704 |
87%|βββββββββ | 9315/10682 [1:19:19<11:15, 2.02it/s]
|
705 |
87%|βββββββββ | 9316/10682 [1:19:19<11:14, 2.02it/s]
|
706 |
87%|βββββββββ | 9317/10682 [1:19:20<11:14, 2.02it/s]
|
707 |
87%|βββββββββ | 9318/10682 [1:19:20<11:13, 2.02it/s]
|
708 |
87%|βββββββββ | 9319/10682 [1:19:21<11:15, 2.02it/s]
|
709 |
87%|βββββββββ | 9320/10682 [1:19:21<11:14, 2.02it/s]
|
710 |
87%|βββββββββ | 9321/10682 [1:19:22<11:13, 2.02it/s]
|
711 |
87%|βββββββββ | 9322/10682 [1:19:22<11:12, 2.02it/s]
|
712 |
87%|βββββββββ | 9323/10682 [1:19:22<11:11, 2.02it/s]
|
713 |
87%|βββββββββ | 9324/10682 [1:19:23<11:11, 2.02it/s]
|
714 |
87%|βββββββββ | 9325/10682 [1:19:23<11:11, 2.02it/s]{'loss': 2.7366, 'grad_norm': 0.2855311632156372, 'learning_rate': 4.836732641133895e-05, 'epoch': 12.21}
|
715 |
|
716 |
+
|
717 |
87%|βββββββββ | 9325/10682 [1:19:23<11:11, 2.02it/s]
|
718 |
87%|βββββββββ | 9326/10682 [1:19:24<11:11, 2.02it/s]
|
719 |
87%|βββββββββ | 9327/10682 [1:19:24<11:10, 2.02it/s]
|
720 |
87%|βββββββββ | 9328/10682 [1:19:25<11:09, 2.02it/s]
|
721 |
87%|βββββββββ | 9329/10682 [1:19:25<11:08, 2.02it/s]
|
722 |
87%|βββββββββ | 9330/10682 [1:19:26<11:08, 2.02it/s]
|
723 |
87%|βββββββββ | 9331/10682 [1:19:26<11:08, 2.02it/s]
|
724 |
87%|βββββββββ | 9332/10682 [1:19:27<11:07, 2.02it/s]
|
725 |
87%|βββββββββ | 9333/10682 [1:19:27<11:06, 2.02it/s]
|
726 |
87%|βββββββββ | 9334/10682 [1:19:28<11:07, 2.02it/s]
|
727 |
87%|βββββββββ | 9335/10682 [1:19:28<11:06, 2.02it/s]
|
728 |
87%|βββββββββ | 9336/10682 [1:19:29<11:05, 2.02it/s]
|
729 |
87%|βββββββββ | 9337/10682 [1:19:29<11:05, 2.02it/s]
|
730 |
87%|βββοΏ½οΏ½βββββ | 9338/10682 [1:19:30<11:04, 2.02it/s]
|
731 |
87%|βββββββββ | 9339/10682 [1:19:30<11:04, 2.02it/s]
|
732 |
87%|βββββββββ | 9340/10682 [1:19:31<11:03, 2.02it/s]
|
733 |
87%|βββββββββ | 9341/10682 [1:19:31<11:02, 2.02it/s]
|
734 |
87%|βββββββββ | 9342/10682 [1:19:32<11:02, 2.02it/s]
|
735 |
87%|βββββββββ | 9343/10682 [1:19:32<11:01, 2.02it/s]
|
736 |
87%|βββββββββ | 9344/10682 [1:19:33<11:01, 2.02it/s]
|
737 |
87%|βββββββββ | 9345/10682 [1:19:33<11:00, 2.02it/s]
|
738 |
87%|βββββββββ | 9346/10682 [1:19:34<11:00, 2.02it/s]
|
739 |
88%|βββββββββ | 9347/10682 [1:19:34<11:00, 2.02it/s]
|
740 |
88%|βββββββββ | 9348/10682 [1:19:35<10:59, 2.02it/s]
|
741 |
88%|βββββββββ | 9349/10682 [1:19:35<10:59, 2.02it/s]
|
742 |
88%|βββββββββ | 9350/10682 [1:19:36<10:58, 2.02it/s]{'loss': 2.7481, 'grad_norm': 0.28073567152023315, 'learning_rate': 4.662958195146971e-05, 'epoch': 12.25}
|
743 |
+
|
744 |
|
745 |
88%|βββββββββ | 9350/10682 [1:19:36<10:58, 2.02it/s]
|
746 |
88%|βββββββββ | 9351/10682 [1:19:36<10:58, 2.02it/s]
|
747 |
88%|βββββββββ | 9352/10682 [1:19:37<10:57, 2.02it/s]
|
748 |
88%|βββββββββ | 9353/10682 [1:19:37<10:56, 2.02it/s]
|
749 |
88%|βββββββββ | 9354/10682 [1:19:38<10:56, 2.02it/s]
|
750 |
88%|βββββββββ | 9355/10682 [1:19:38<10:55, 2.02it/s]
|
751 |
88%|βββββββββ | 9356/10682 [1:19:39<10:55, 2.02it/s]
|
752 |
88%|βββββββββ | 9357/10682 [1:19:39<10:55, 2.02it/s]
|
753 |
88%|βββββββββ | 9358/10682 [1:19:40<10:54, 2.02it/s]
|
754 |
88%|βββββββββ | 9359/10682 [1:19:40<10:54, 2.02it/s]
|
755 |
88%|βββββββββ | 9360/10682 [1:19:41<10:54, 2.02it/s]
|
756 |
88%|βββββββββ | 9361/10682 [1:19:41<10:54, 2.02it/s]
|
757 |
88%|βββββββββ | 9362/10682 [1:19:42<10:53, 2.02it/s]
|
758 |
88%|βββββββββ | 9363/10682 [1:19:42<10:53, 2.02it/s]
|
759 |
88%|βββββββββ | 9364/10682 [1:19:43<10:52, 2.02it/s]
|
760 |
88%|βββββββββ | 9365/10682 [1:19:43<10:51, 2.02it/s]
|
761 |
88%|βββββββββ | 9366/10682 [1:19:44<10:50, 2.02it/s]
|
762 |
88%|βββββββββ | 9367/10682 [1:19:44<10:50, 2.02it/s]
|
763 |
88%|βββββββββ | 9368/10682 [1:19:45<10:49, 2.02it/s]
|
764 |
88%|βββββββββ | 9369/10682 [1:19:45<10:49, 2.02it/s]
|
765 |
88%|βββββββββ | 9370/10682 [1:19:46<10:48, 2.02it/s]
|
766 |
88%|βββββββββ | 9371/10682 [1:19:46<10:48, 2.02it/s]
|
767 |
88%|βββββββββ | 9372/10682 [1:19:47<10:47, 2.02it/s]
|
768 |
88%|βββββββββ | 9373/10682 [1:19:47<10:47, 2.02it/s]
|
769 |
88%|βββββββββ | 9374/10682 [1:19:48<10:46, 2.02it/s]
|
770 |
88%|βββββββββ | 9375/10682 [1:19:48<10:45, 2.03it/s]
|
771 |
|
772 |
+
|
773 |
88%|βββββββββ | 9375/10682 [1:19:48<10:45, 2.03it/s]
|
774 |
88%|βββββββββ | 9376/10682 [1:19:49<10:45, 2.02it/s]
|
775 |
88%|βββββββββ | 9377/10682 [1:19:49<10:44, 2.02it/s]
|
776 |
88%|βββββββββ | 9378/10682 [1:19:50<10:44, 2.02it/s]
|
777 |
88%|βββββββββ | 9379/10682 [1:19:50<10:43, 2.02it/s]
|
778 |
88%|βββββββββ | 9380/10682 [1:19:51<10:44, 2.02it/s]
|
779 |
88%|βββββββββ | 9381/10682 [1:19:51<10:42, 2.02it/s]
|
780 |
88%|βββββββββ | 9382/10682 [1:19:52<10:42, 2.02it/s]
|
781 |
88%|βββββββββ | 9383/10682 [1:19:52<10:41, 2.03it/s]
|
782 |
88%|βββββββββ | 9384/10682 [1:19:53<10:41, 2.02it/s]
|
783 |
88%|βββββββββ | 9385/10682 [1:19:53<10:40, 2.03it/s]
|
784 |
88%|βββββββββ | 9386/10682 [1:19:54<10:40, 2.02it/s]
|
785 |
88%|βββββββββ | 9387/10682 [1:19:54<10:39, 2.02it/s]
|
786 |
88%|βββββββββ | 9388/10682 [1:19:55<10:39, 2.02it/s]
|
787 |
88%|βββββββββ | 9389/10682 [1:19:55<10:39, 2.02it/s]
|
788 |
88%|βββββββββ | 9390/10682 [1:19:56<10:37, 2.03it/s]
|
789 |
88%|βββββββββ | 9391/10682 [1:19:56<10:38, 2.02it/s]
|
790 |
88%|βββββββββ | 9392/10682 [1:19:57<10:37, 2.02it/s]
|
791 |
88%|βββββββββ | 9393/10682 [1:19:57<10:36, 2.02it/s]
|
792 |
88%|βββββββββ | 9394/10682 [1:19:58<10:35, 2.03it/s]
|
793 |
88%|βββββββββ | 9395/10682 [1:19:58<10:35, 2.02it/s]
|
794 |
88%|βββββββββ | 9396/10682 [1:19:59<10:35, 2.02it/s]
|
795 |
88%|βββββββββ | 9397/10682 [1:19:59<10:35, 2.02it/s]
|
796 |
88%|βββββββββ | 9398/10682 [1:20:00<10:34, 2.02it/s]
|
797 |
88%|βββββββββ | 9399/10682 [1:20:00<10:34, 2.02it/s]
|
798 |
88%|βββββββββ | 9400/10682 [1:20:01<10:33, 2.02it/s]{'loss': 2.7415, 'grad_norm': 0.27810999751091003, 'learning_rate': 4.3244996147050855e-05, 'epoch': 12.31}
|
799 |
|
800 |
+
|
801 |
88%|βββββββββ | 9400/10682 [1:20:01<10:33, 2.02it/s]
|
802 |
88%|βββββββββ | 9401/10682 [1:20:01<10:34, 2.02it/s]
|
803 |
88%|βββββββββ | 9402/10682 [1:20:02<10:33, 2.02it/s]
|
804 |
88%|βββββββββ | 9403/10682 [1:20:02<10:32, 2.02it/s]
|
805 |
88%|βββββββββ | 9404/10682 [1:20:03<10:32, 2.02it/s]
|
806 |
88%|βββββββββ | 9405/10682 [1:20:03<10:32, 2.02it/s]
|
807 |
88%|βββββββββ | 9406/10682 [1:20:04<10:31, 2.02it/s]
|
808 |
88%|βββββββββ | 9407/10682 [1:20:04<10:30, 2.02it/s]
|
809 |
88%|βββββββββ | 9408/10682 [1:20:05<10:30, 2.02it/s]
|
810 |
88%|βββββββββ | 9409/10682 [1:20:05<10:29, 2.02it/s]
|
811 |
88%|βββββββββ | 9410/10682 [1:20:06<10:29, 2.02it/s]
|
812 |
88%|βββββββββ | 9411/10682 [1:20:06<10:28, 2.02it/s]
|
813 |
88%|βββββββββ | 9412/10682 [1:20:07<10:28, 2.02it/s]
|
814 |
88%|βββββββββ | 9413/10682 [1:20:07<10:27, 2.02it/s]
|
815 |
88%|βββββββββ | 9414/10682 [1:20:07<10:27, 2.02it/s]
|
816 |
88%|βββββββββ | 9415/10682 [1:20:08<10:26, 2.02it/s]
|
817 |
88%|βββββββββ | 9416/10682 [1:20:08<10:26, 2.02it/s]
|
818 |
88%|βββββββββ | 9417/10682 [1:20:09<10:26, 2.02it/s]
|
819 |
88%|βββββββββ | 9418/10682 [1:20:09<10:25, 2.02it/s]
|
820 |
88%|βββββββββ | 9419/10682 [1:20:10<10:25, 2.02it/s]
|
821 |
88%|βββββββββ | 9420/10682 [1:20:10<10:24, 2.02it/s]
|
822 |
88%|βββββββββ | 9421/10682 [1:20:11<10:23, 2.02it/s]
|
823 |
88%|βββββββββ | 9422/10682 [1:20:11<10:23, 2.02it/s]
|
824 |
88%|βββββββββ | 9423/10682 [1:20:12<10:23, 2.02it/s]
|
825 |
88%|βββββββββ | 9424/10682 [1:20:12<10:22, 2.02it/s]
|
826 |
88%|βββββββββ | 9425/10682 [1:20:13<10:21, 2.02it/s]
|
827 |
|
828 |
+
|
829 |
88%|βββββββββ | 9425/10682 [1:20:13<10:21, 2.02it/s]
|
830 |
88%|βββββββββ | 9426/10682 [1:20:13<10:21, 2.02it/s]
|
831 |
88%|βββββββββ | 9427/10682 [1:20:14<10:21, 2.02it/s]
|
832 |
88%|βββββββββ | 9428/10682 [1:20:14<10:20, 2.02it/s]
|
833 |
88%|βββββββββ | 9429/10682 [1:20:15<10:19, 2.02it/s]
|
834 |
88%|βββββββββ | 9430/10682 [1:20:15<10:18, 2.02it/s]
|
835 |
88%|βββββββββ | 9431/10682 [1:20:16<10:18, 2.02it/s]
|
836 |
88%|βββββββββ | 9432/10682 [1:20:16<10:17, 2.02it/s]
|
837 |
88%|βββββββββ | 9433/10682 [1:20:17<10:17, 2.02it/s]
|
838 |
88%|βββββββββ | 9434/10682 [1:20:17<10:16, 2.02it/s]
|
839 |
88%|βββββββββ | 9435/10682 [1:20:18<10:16, 2.02it/s]
|
840 |
88%|βββββββββ | 9436/10682 [1:20:18<10:15, 2.02it/s]
|
841 |
88%|βββββββββ | 9437/10682 [1:20:19<10:15, 2.02it/s]
|
842 |
88%|βββββββββ | 9438/10682 [1:20:19<10:14, 2.02it/s]
|
843 |
88%|βββββββββ | 9439/10682 [1:20:20<10:14, 2.02it/s]
|
844 |
88%|βββββββββ | 9440/10682 [1:20:20<10:13, 2.02it/s]
|
845 |
88%|βββββββββ | 9441/10682 [1:20:21<10:13, 2.02it/s]
|
846 |
88%|βββββββββ | 9442/10682 [1:20:21<10:12, 2.02it/s]
|
847 |
88%|βββββββββ | 9443/10682 [1:20:22<10:12, 2.02it/s]
|
848 |
88%|βββββββββ | 9444/10682 [1:20:22<10:11, 2.02it/s]
|
849 |
88%|βββββββββ | 9445/10682 [1:20:23<10:12, 2.02it/s]
|
850 |
88%|βββββββββ | 9446/10682 [1:20:23<10:11, 2.02it/s]
|
851 |
88%|βββββββββ | 9447/10682 [1:20:24<10:10, 2.02it/s]
|
852 |
88%|βββββββββ | 9448/10682 [1:20:24<10:09, 2.02it/s]
|
853 |
88%|βββββββββ | 9449/10682 [1:20:25<10:08, 2.03it/s]
|
854 |
88%|βββββββββ | 9450/10682 [1:20:25<10:08, 2.02it/s]
|
855 |
{'loss': 2.7417, 'grad_norm': 0.2766771912574768, 'learning_rate': 3.998236419395806e-05, 'epoch': 12.38}
|
856 |
+
|
857 |
88%|βββββββββ | 9450/10682 [1:20:25<10:08, 2.02it/s]
|
858 |
88%|βββββββββ | 9451/10682 [1:20:26<10:08, 2.02it/s]
|
859 |
88%|βββββββββ | 9452/10682 [1:20:26<10:07, 2.02it/s]
|
860 |
88%|βββββββββ | 9453/10682 [1:20:27<10:07, 2.02it/s]
|
861 |
89%|βββββββββ | 9454/10682 [1:20:27<10:06, 2.02it/s]
|
862 |
89%|βββββββββ | 9455/10682 [1:20:28<10:06, 2.02it/s]
|
863 |
89%|βββββββββ | 9456/10682 [1:20:28<10:05, 2.02it/s]
|
864 |
89%|βββββββββ | 9457/10682 [1:20:29<10:05, 2.02it/s]
|
865 |
89%|βββββββββ | 9458/10682 [1:20:29<10:05, 2.02it/s]
|
866 |
89%|βββββββββ | 9459/10682 [1:20:30<10:57, 1.86it/s]
|
867 |
89%|βββββββββ | 9460/10682 [1:20:30<10:40, 1.91it/s]
|
868 |
89%|βββββββββ | 9461/10682 [1:20:31<10:29, 1.94it/s]
|
869 |
89%|ββββοΏ½οΏ½ββββ | 9462/10682 [1:20:31<10:21, 1.96it/s]
|
870 |
89%|βββββββββ | 9463/10682 [1:20:32<10:15, 1.98it/s]
|
871 |
89%|βββββββββ | 9464/10682 [1:20:32<10:10, 1.99it/s]
|
872 |
89%|βββββββββ | 9465/10682 [1:20:33<10:07, 2.00it/s]
|
873 |
89%|βββββββββ | 9466/10682 [1:20:33<10:05, 2.01it/s]
|
874 |
89%|βββββββββ | 9467/10682 [1:20:34<10:02, 2.02it/s]
|
875 |
89%|βββββββββ | 9468/10682 [1:20:34<10:01, 2.02it/s]
|
876 |
89%|βββββββββ | 9469/10682 [1:20:35<10:00, 2.02it/s]
|
877 |
89%|βββββββββ | 9470/10682 [1:20:35<09:59, 2.02it/s]
|
878 |
89%|βββββββββ | 9471/10682 [1:20:36<09:58, 2.02it/s]
|
879 |
89%|βββββββββ | 9472/10682 [1:20:36<09:58, 2.02it/s]
|
880 |
89%|βββββββββ | 9473/10682 [1:20:37<09:57, 2.02it/s]
|
881 |
89%|βββββββββ | 9474/10682 [1:20:37<09:56, 2.02it/s]
|
882 |
89%|βββββββββ | 9475/10682 [1:20:38<09:55, 2.03it/s]{'loss': 2.7382, 'grad_norm': 0.27711209654808044, 'learning_rate': 3.839705441646779e-05, 'epoch': 12.41}
|
883 |
|
884 |
+
|
885 |
89%|βββββββββ | 9475/10682 [1:20:38<09:55, 2.03it/s]
|
886 |
89%|βββββββββ | 9476/10682 [1:20:38<09:58, 2.02it/s]
|
887 |
89%|βββββββββ | 9477/10682 [1:20:39<09:56, 2.02it/s]
|
888 |
89%|βββββββββ | 9478/10682 [1:20:39<09:56, 2.02it/s]
|
889 |
89%|βββββββββ | 9479/10682 [1:20:40<09:55, 2.02it/s]
|
890 |
89%|βββββββββ | 9480/10682 [1:20:40<09:54, 2.02it/s]
|
891 |
89%|βββββββββ | 9481/10682 [1:20:41<09:53, 2.02it/s]
|
892 |
89%|βββββββββ | 9482/10682 [1:20:41<09:53, 2.02it/s]
|
893 |
89%|βββββββββ | 9483/10682 [1:20:42<09:53, 2.02it/s]
|
894 |
89%|βββββββββ | 9484/10682 [1:20:42<09:52, 2.02it/s]
|
895 |
89%|βββββββββ | 9485/10682 [1:20:43<09:51, 2.02it/s]
|
896 |
89%|βββββββββ | 9486/10682 [1:20:43<09:51, 2.02it/s]
|
897 |
89%|βββββββββ | 9487/10682 [1:20:44<09:50, 2.02it/s]
|
898 |
89%|βββββββββ | 9488/10682 [1:20:44<09:50, 2.02it/s]
|
899 |
89%|βββββββββ | 9489/10682 [1:20:45<09:49, 2.02it/s]
|
900 |
89%|βββββββββ | 9490/10682 [1:20:45<09:49, 2.02it/s]
|
901 |
89%|βββββββββ | 9491/10682 [1:20:46<09:48, 2.02it/s]
|
902 |
89%|βββββββββ | 9492/10682 [1:20:46<09:47, 2.02it/s]
|
903 |
89%|βββββββββ | 9493/10682 [1:20:47<09:46, 2.03it/s]
|
904 |
89%|βββββββββ | 9494/10682 [1:20:47<09:46, 2.02it/s]
|
905 |
89%|βββββββββ | 9495/10682 [1:20:48<09:46, 2.02it/s]
|
906 |
89%|βββββββββ | 9496/10682 [1:20:48<09:46, 2.02it/s]
|
907 |
89%|βββββββββ | 9497/10682 [1:20:49<09:45, 2.02it/s]
|
908 |
89%|βββββββββ | 9498/10682 [1:20:49<09:45, 2.02it/s]
|
909 |
89%|βββββββββ | 9499/10682 [1:20:50<09:44, 2.02it/s]
|
910 |
89%|βββββββββ | 9500/10682 [1:20:50<09:43, 2.02it/s]
|
911 |
|
912 |
+
|
913 |
89%|βββββββββ | 9500/10682 [1:20:50<09:43, 2.02it/s]
|
914 |
89%|βββββββββ | 9501/10682 [1:20:51<09:44, 2.02it/s]
|
915 |
89%|βββββββββ | 9502/10682 [1:20:51<09:43, 2.02it/s]
|
916 |
89%|βββββββββ | 9503/10682 [1:20:52<09:43, 2.02it/s]
|
917 |
89%|βββββββββ | 9504/10682 [1:20:52<09:42, 2.02it/s]
|
918 |
89%|βββββββββ | 9505/10682 [1:20:53<09:42, 2.02it/s]
|
919 |
89%|βββββββββ | 9506/10682 [1:20:53<09:41, 2.02it/s]
|
920 |
89%|βββββββββ | 9507/10682 [1:20:54<09:40, 2.02it/s]
|
921 |
89%|βββββββββ | 9508/10682 [1:20:54<09:39, 2.03it/s]
|
922 |
89%|βββββββββ | 9509/10682 [1:20:55<09:39, 2.02it/s]
|
923 |
89%|βββββββββ | 9510/10682 [1:20:55<09:38, 2.03it/s]
|
924 |
89%|βββββββββ | 9511/10682 [1:20:56<09:38, 2.02it/s]
|
925 |
89%|βββββββββ | 9512/10682 [1:20:56<09:37, 2.02it/s]
|
926 |
89%|βββββββββ | 9513/10682 [1:20:57<09:37, 2.02it/s]
|
927 |
89%|βββββββββ | 9514/10682 [1:20:57<09:37, 2.02it/s]
|
928 |
89%|βββββββββ | 9515/10682 [1:20:58<09:36, 2.02it/s]
|
929 |
89%|βββββββββ | 9516/10682 [1:20:58<09:36, 2.02it/s]
|
930 |
89%|βββββββββ | 9517/10682 [1:20:59<09:35, 2.02it/s]
|
931 |
89%|βββββββββ | 9518/10682 [1:20:59<09:35, 2.02it/s]
|
932 |
89%|βββββββββ | 9519/10682 [1:21:00<09:34, 2.03it/s]
|
933 |
89%|βββββββββ | 9520/10682 [1:21:00<09:34, 2.02it/s]
|
934 |
89%|βββββββββ | 9521/10682 [1:21:01<09:33, 2.02it/s]
|
935 |
89%|βββββββββ | 9522/10682 [1:21:01<09:33, 2.02it/s]
|
936 |
89%|βββββββββ | 9523/10682 [1:21:02<09:32, 2.02it/s]
|
937 |
89%|βββββββββ | 9524/10682 [1:21:02<09:32, 2.02it/s]
|
938 |
89%|βββββββββ | 9525/10682 [1:21:03<09:31, 2.02it/s]
|
939 |
|
940 |
+
|
941 |
89%|βββββββββ | 9525/10682 [1:21:03<09:31, 2.02it/s]
|
942 |
89%|βββββββββ | 9526/10682 [1:21:03<09:32, 2.02it/s]
|
943 |
89%|βββββββββ | 9527/10682 [1:21:03<09:31, 2.02it/s]
|
944 |
89%|βββββββββ | 9528/10682 [1:21:04<09:30, 2.02it/s]
|
945 |
89%|βββββββββ | 9529/10682 [1:21:04<09:29, 2.02it/s]
|
946 |
89%|βββββββββ | 9530/10682 [1:21:05<09:28, 2.02it/s]
|
947 |
89%|βββββββββ | 9531/10682 [1:21:05<09:28, 2.02it/s]
|
948 |
89%|βββββββββ | 9532/10682 [1:21:06<09:27, 2.03it/s]
|
949 |
89%|βββββββββ | 9533/10682 [1:21:06<09:27, 2.02it/s]
|
950 |
89%|βββββββββ | 9534/10682 [1:21:07<09:26, 2.02it/s]
|
951 |
89%|βββββββββ | 9535/10682 [1:21:07<09:26, 2.02it/s]
|
952 |
89%|βββββββββ | 9536/10682 [1:21:08<09:26, 2.02it/s]
|
953 |
89%|βββββββββ | 9537/10682 [1:21:08<09:26, 2.02it/s]
|
954 |
89%|βββββββββ | 9538/10682 [1:21:09<09:25, 2.02it/s]
|
955 |
89%|βββββββββ | 9539/10682 [1:21:09<09:25, 2.02it/s]
|
956 |
89%|βββββββββ | 9540/10682 [1:21:10<09:24, 2.02it/s]
|
957 |
89%|βββββββββ | 9541/10682 [1:21:10<09:24, 2.02it/s]
|
958 |
89%|βββββββββ | 9542/10682 [1:21:11<09:23, 2.02it/s]
|
959 |
89%|βββββββββ | 9543/10682 [1:21:11<09:23, 2.02it/s]
|
960 |
89%|βββββββββ | 9544/10682 [1:21:12<09:22, 2.02it/s]
|
961 |
89%|βββββββββ | 9545/10682 [1:21:12<09:21, 2.02it/s]
|
962 |
89%|βββββββββ | 9546/10682 [1:21:13<09:21, 2.02it/s]
|
963 |
89%|βββββββββ | 9547/10682 [1:21:13<09:20, 2.02it/s]
|
964 |
89%|βββββββββ | 9548/10682 [1:21:14<09:20, 2.02it/s]
|
965 |
89%|βββββββββ | 9549/10682 [1:21:14<09:20, 2.02it/s]
|
966 |
89%|βββββββββ | 9550/10682 [1:21:15<09:19, 2.02it/s]
|
967 |
|
968 |
+
|
969 |
89%|βββββββββ | 9550/10682 [1:21:15<09:19, 2.02it/s]
|
970 |
89%|βββββββββ | 9551/10682 [1:21:15<09:19, 2.02it/s]
|
971 |
89%|βββββββββ | 9552/10682 [1:21:16<09:18, 2.02it/s]
|
972 |
89%|βββββββββ | 9553/10682 [1:21:16<09:18, 2.02it/s]
|
973 |
89%|βββββββββ | 9554/10682 [1:21:17<09:17, 2.02it/s]
|
974 |
89%|βββββββββ | 9555/10682 [1:21:17<09:17, 2.02it/s]
|
975 |
89%|βββββββββ | 9556/10682 [1:21:18<09:16, 2.02it/s]
|
976 |
89%|βββββββββ | 9557/10682 [1:21:18<09:15, 2.03it/s]
|
977 |
89%|βββββββββ | 9558/10682 [1:21:19<09:15, 2.02it/s]
|
978 |
89%|βββββββββ | 9559/10682 [1:21:19<09:14, 2.03it/s]
|
979 |
89%|βββββββββ | 9560/10682 [1:21:20<09:13, 2.03it/s]
|
980 |
90%|βββββββββ | 9561/10682 [1:21:20<09:13, 2.02it/s]
|
981 |
90%|βββββββββ | 9562/10682 [1:21:21<09:12, 2.03it/s]
|
982 |
90%|βββββββββ | 9563/10682 [1:21:21<09:13, 2.02it/s]
|
983 |
90%|βββββββββ | 9564/10682 [1:21:22<09:12, 2.02it/s]
|
984 |
90%|βββββββββ | 9565/10682 [1:21:22<09:12, 2.02it/s]
|
985 |
90%|βββββββββ | 9566/10682 [1:21:23<09:11, 2.02it/s]
|
986 |
90%|βββββββββ | 9567/10682 [1:21:23<09:11, 2.02it/s]
|
987 |
90%|βββββββββ | 9568/10682 [1:21:24<09:10, 2.02it/s]
|
988 |
90%|βββββββββ | 9569/10682 [1:21:24<09:57, 1.86it/s]
|
989 |
90%|βββββββββ | 9570/10682 [1:21:25<09:43, 1.91it/s]
|
990 |
90%|βββββββββ | 9571/10682 [1:21:25<09:32, 1.94it/s]
|
991 |
90%|βββββββββ | 9572/10682 [1:21:26<09:25, 1.96it/s]
|
992 |
90%|βββββββββ | 9573/10682 [1:21:26<09:20, 1.98it/s]
|
993 |
90%|βββββββββ | 9574/10682 [1:21:27<09:15, 1.99it/s]
|
994 |
90%|βββββββββ | 9575/10682 [1:21:27<09:13, 2.00it/s]
|
995 |
|
996 |
+
|
997 |
90%|βββββββββ | 9575/10682 [1:21:27<09:13, 2.00it/s]
|
998 |
90%|βββββββββ | 9576/10682 [1:21:28<09:13, 2.00it/s]
|
999 |
90%|βββββββββ | 9577/10682 [1:21:28<09:10, 2.01it/s]
|
1000 |
90%|βββββββββ | 9578/10682 [1:21:29<09:08, 2.01it/s]
|
1001 |
90%|βββββββββ | 9579/10682 [1:21:29<09:07, 2.02it/s]
|
1002 |
90%|βββββββββ | 9580/10682 [1:21:30<09:06, 2.02it/s]
|
1003 |
90%|βββββββββ | 9581/10682 [1:21:30<09:05, 2.02it/s]
|
1004 |
90%|βββββββββ | 9582/10682 [1:21:31<09:05, 2.02it/s]
|
1005 |
90%|βββββββββ | 9583/10682 [1:21:31<09:03, 2.02it/s]
|
1006 |
90%|βββββββββ | 9584/10682 [1:21:32<09:03, 2.02it/s]
|
1007 |
90%|βββββββββ | 9585/10682 [1:21:32<09:01, 2.02it/s]
|
1008 |
90%|βββββββββ | 9586/10682 [1:21:33<09:01, 2.02it/s]
|
1009 |
90%|βββββββββ | 9587/10682 [1:21:33<09:00, 2.03it/s]
|
1010 |
90%|βββββββββ | 9588/10682 [1:21:34<09:00, 2.02it/s]
|
1011 |
90%|βββββββββ | 9589/10682 [1:21:34<08:59, 2.02it/s]
|
1012 |
90%|βββββββββ | 9590/10682 [1:21:35<08:59, 2.02it/s]
|
1013 |
90%|βββββββββ | 9591/10682 [1:21:35<08:58, 2.03it/s]
|
1014 |
90%|βββββββββ | 9592/10682 [1:21:36<08:58, 2.03it/s]
|
1015 |
90%|βββββββββ | 9593/10682 [1:21:36<08:57, 2.03it/s]
|
1016 |
90%|βββββββββ | 9594/10682 [1:21:37<08:57, 2.03it/s]
|
1017 |
90%|βββββββββ | 9595/10682 [1:21:37<08:57, 2.02it/s]
|
1018 |
90%|βββββββββ | 9596/10682 [1:21:38<08:56, 2.02it/s]
|
1019 |
90%|βββββββββ | 9597/10682 [1:21:38<08:56, 2.02it/s]
|
1020 |
90%|βββββββββ | 9598/10682 [1:21:39<08:55, 2.02it/s]
|
1021 |
90%|βββββββββ | 9599/10682 [1:21:39<08:55, 2.02it/s]
|
1022 |
90%|βββββββββ | 9600/10682 [1:21:40<08:55, 2.02it/s]
|
1023 |
|
1024 |
+
|
1025 |
90%|βββββββββ | 9600/10682 [1:21:40<08:55, 2.02it/s]
|
1026 |
90%|βββββββββ | 9601/10682 [1:21:40<08:54, 2.02it/s]
|
1027 |
90%|βββββββββ | 9602/10682 [1:21:41<08:54, 2.02it/s]
|
1028 |
90%|βββββββββ | 9603/10682 [1:21:41<08:53, 2.02it/s]
|
1029 |
90%|βββββββββ | 9604/10682 [1:21:42<08:52, 2.02it/s]
|
1030 |
90%|βββββββββ | 9605/10682 [1:21:42<08:52, 2.02it/s]
|
1031 |
90%|βββββββββ | 9606/10682 [1:21:43<08:52, 2.02it/s]
|
1032 |
90%|βββββββββ | 9607/10682 [1:21:43<08:51, 2.02it/s]
|
1033 |
90%|βββββββββ | 9608/10682 [1:21:44<08:51, 2.02it/s]
|
1034 |
90%|βββββββββ | 9609/10682 [1:21:44<08:50, 2.02it/s]
|
1035 |
90%|βββββββββ | 9610/10682 [1:21:45<08:49, 2.03it/s]
|
1036 |
90%|βββββββββ | 9611/10682 [1:21:45<08:49, 2.02it/s]
|
1037 |
90%|βββββββββ | 9612/10682 [1:21:46<08:48, 2.03it/s]
|
1038 |
90%|βββββββββ | 9613/10682 [1:21:46<08:48, 2.02it/s]
|
1039 |
90%|βββββββββ | 9614/10682 [1:21:47<08:46, 2.03it/s]
|
1040 |
90%|βββββββββ | 9615/10682 [1:21:47<08:46, 2.02it/s]
|
1041 |
90%|βββββββββ | 9616/10682 [1:21:48<08:46, 2.03it/s]
|
1042 |
90%|βββββββββ | 9617/10682 [1:21:48<08:46, 2.02it/s]
|
1043 |
90%|βββββββββ | 9618/10682 [1:21:49<08:45, 2.02it/s]
|
1044 |
90%|βββββββββ | 9619/10682 [1:21:49<08:45, 2.02it/s]
|
1045 |
90%|βββββββββ | 9620/10682 [1:21:50<08:44, 2.02it/s]
|
1046 |
90%|βββββββββ | 9621/10682 [1:21:50<08:44, 2.02it/s]
|
1047 |
90%|βββββββββ | 9622/10682 [1:21:51<08:43, 2.02it/s]
|
1048 |
90%|βββββββββ | 9623/10682 [1:21:51<08:43, 2.02it/s]
|
1049 |
90%|βββββββββ | 9624/10682 [1:21:52<08:43, 2.02it/s]
|
1050 |
90%|βββββββββ | 9625/10682 [1:21:52<08:42, 2.02it/s]{'loss': 2.7489, 'grad_norm': 0.2770747244358063, 'learning_rate': 2.9535819277506203e-05, 'epoch': 12.61}
|
1051 |
|
1052 |
+
|
1053 |
90%|βββββββββ | 9625/10682 [1:21:52<08:42, 2.02it/s]
|
1054 |
90%|βββββββββ | 9626/10682 [1:21:53<08:42, 2.02it/s]
|
1055 |
90%|βββββββββ | 9627/10682 [1:21:53<08:42, 2.02it/s]
|
1056 |
90%|βββββββββ | 9628/10682 [1:21:54<08:41, 2.02it/s]
|
1057 |
90%|βββββββββ | 9629/10682 [1:21:54<08:40, 2.02it/s]
|
1058 |
90%|βββββββββ | 9630/10682 [1:21:55<08:39, 2.02it/s]
|
1059 |
90%|βββββββββ | 9631/10682 [1:21:55<08:39, 2.02it/s]
|
1060 |
90%|βββββββββ | 9632/10682 [1:21:56<08:38, 2.02it/s]
|
1061 |
90%|βββββββββ | 9633/10682 [1:21:56<08:38, 2.02it/s]
|
1062 |
90%|βββββββββ | 9634/10682 [1:21:57<08:37, 2.02it/s]
|
1063 |
90%|βββββββββ | 9635/10682 [1:21:57<08:37, 2.02it/s]
|
1064 |
90%|βββββββββ | 9636/10682 [1:21:58<08:36, 2.02it/s]
|
1065 |
90%|βββββββββ | 9637/10682 [1:21:58<08:36, 2.02it/s]
|
1066 |
90%|βββββββββ | 9638/10682 [1:21:59<08:35, 2.02it/s]
|
1067 |
90%|βββββββββ | 9639/10682 [1:21:59<08:35, 2.02it/s]
|
1068 |
90%|βββββββββ | 9640/10682 [1:21:59<08:34, 2.02it/s]
|
1069 |
90%|βββββββββ | 9641/10682 [1:22:00<08:34, 2.02it/s]
|
1070 |
90%|βββββββββ | 9642/10682 [1:22:00<08:34, 2.02it/s]
|
1071 |
90%|βββββββββ | 9643/10682 [1:22:01<08:33, 2.02it/s]
|
1072 |
90%|βββββββββ | 9644/10682 [1:22:01<08:33, 2.02it/s]
|
1073 |
90%|βββββββββ | 9645/10682 [1:22:02<08:32, 2.02it/s]
|
1074 |
90%|βββββββββ | 9646/10682 [1:22:02<08:32, 2.02it/s]
|
1075 |
90%|βββββββββ | 9647/10682 [1:22:03<08:31, 2.02it/s]
|
1076 |
90%|βββββββββ | 9648/10682 [1:22:03<08:31, 2.02it/s]
|
1077 |
90%|βββββββββ | 9649/10682 [1:22:04<08:30, 2.02it/s]
|
1078 |
90%|βββββββββ | 9650/10682 [1:22:04<08:29, 2.02it/s]{'loss': 2.7507, 'grad_norm': 0.2786267101764679, 'learning_rate': 2.8168304138088295e-05, 'epoch': 12.64}
|
1079 |
|
1080 |
+
|
1081 |
90%|βββββββββ | 9650/10682 [1:22:04<08:29, 2.02it/s]
|
1082 |
90%|βββββββββ | 9651/10682 [1:22:05<08:29, 2.02it/s]
|
1083 |
90%|βββββββββ | 9652/10682 [1:22:05<08:29, 2.02it/s]
|
1084 |
90%|βββββββββ | 9653/10682 [1:22:06<08:29, 2.02it/s]
|
1085 |
90%|βββββββββ | 9654/10682 [1:22:06<08:28, 2.02it/s]
|
1086 |
90%|βββββββββ | 9655/10682 [1:22:07<08:28, 2.02it/s]
|
1087 |
90%|βββββββββ | 9656/10682 [1:22:07<08:27, 2.02it/s]
|
1088 |
90%|βββββββββ | 9657/10682 [1:22:08<08:27, 2.02it/s]
|
1089 |
90%|βββββββββ | 9658/10682 [1:22:08<08:26, 2.02it/s]
|
1090 |
90%|βββββββββ | 9659/10682 [1:22:09<08:26, 2.02it/s]
|
1091 |
90%|βββββββββ | 9660/10682 [1:22:09<08:25, 2.02it/s]
|
1092 |
90%|βββββββββ | 9661/10682 [1:22:10<08:25, 2.02it/s]
|
1093 |
90%|βββββββββ | 9662/10682 [1:22:10<08:24, 2.02it/s]
|
1094 |
90%|βββββββββ | 9663/10682 [1:22:11<08:24, 2.02it/s]
|
1095 |
90%|βββββββββ | 9664/10682 [1:22:11<08:23, 2.02it/s]
|
1096 |
90%|βββββββββ | 9665/10682 [1:22:12<08:23, 2.02it/s]
|
1097 |
90%|βββββββββ | 9666/10682 [1:22:12<08:23, 2.02it/s]
|
1098 |
90%|βββββββββ | 9667/10682 [1:22:13<08:22, 2.02it/s]
|
1099 |
91%|βββββββββ | 9668/10682 [1:22:13<08:21, 2.02it/s]
|
1100 |
91%|βββββββββ | 9669/10682 [1:22:14<08:20, 2.02it/s]
|
1101 |
91%|βββββββββ | 9670/10682 [1:22:14<08:19, 2.02it/s]
|
1102 |
91%|βββββββββ | 9671/10682 [1:22:15<08:19, 2.02it/s]
|
1103 |
91%|βββββββββ | 9672/10682 [1:22:15<08:18, 2.03it/s]
|
1104 |
91%|βββββββββ | 9673/10682 [1:22:16<08:18, 2.02it/s]
|
1105 |
91%|βββββββββ | 9674/10682 [1:22:16<08:17, 2.03it/s]
|
1106 |
91%|βββββββββ | 9675/10682 [1:22:17<08:17, 2.02it/s]
|
1107 |
{'loss': 2.7574, 'grad_norm': 0.27770131826400757, 'learning_rate': 2.6832284358471516e-05, 'epoch': 12.67}
|
1108 |
+
|
1109 |
91%|βββββββββ | 9675/10682 [1:22:17<08:17, 2.02it/s]
|
1110 |
91%|βββββββββ | 9676/10682 [1:22:17<08:16, 2.02it/s]
|
1111 |
91%|βββββββββ | 9677/10682 [1:22:18<08:16, 2.02it/s]
|
1112 |
91%|βββββββββ | 9678/10682 [1:22:18<08:16, 2.02it/s]
|
1113 |
91%|βββββββββ | 9679/10682 [1:22:19<08:15, 2.02it/s]
|
1114 |
91%|βββββββββ | 9680/10682 [1:22:19<08:15, 2.02it/s]
|
1115 |
91%|βββββββββ | 9681/10682 [1:22:20<08:14, 2.02it/s]
|
1116 |
91%|βββββββββ | 9682/10682 [1:22:20<08:14, 2.02it/s]
|
1117 |
91%|βββββββββ | 9683/10682 [1:22:21<08:13, 2.02it/s]
|
1118 |
91%|βββββββββ | 9684/10682 [1:22:21<08:13, 2.02it/s]
|
1119 |
91%|βββββββββ | 9685/10682 [1:22:22<08:12, 2.02it/s]
|
1120 |
91%|βββββββββ | 9686/10682 [1:22:22<08:12, 2.02it/s]
|
1121 |
91%|βββββββββ | 9687/10682 [1:22:23<08:11, 2.03it/s]
|
1122 |
91%|βββββββββ | 9688/10682 [1:22:23<08:11, 2.02it/s]
|
1123 |
91%|βββββββββ | 9689/10682 [1:22:24<08:10, 2.02it/s]
|
1124 |
91%|βββββββββ | 9690/10682 [1:22:24<08:10, 2.02it/s]
|
1125 |
91%|βββββββββ | 9691/10682 [1:22:25<08:09, 2.02it/s]
|
1126 |
91%|βββββββββ | 9692/10682 [1:22:25<08:09, 2.02it/s]
|
1127 |
91%|βββββββββ | 9693/10682 [1:22:26<08:08, 2.02it/s]
|
1128 |
91%|βββββββββ | 9694/10682 [1:22:26<08:08, 2.02it/s]
|
1129 |
91%|βββββββββ | 9695/10682 [1:22:27<08:08, 2.02it/s]
|
1130 |
91%|βββββββββ | 9696/10682 [1:22:27<08:07, 2.02it/s]
|
1131 |
91%|βββββββββ | 9697/10682 [1:22:28<08:06, 2.02it/s]
|
1132 |
91%|βββββββββ | 9698/10682 [1:22:28<08:06, 2.02it/s]
|
1133 |
91%|βββββββββ | 9699/10682 [1:22:29<08:05, 2.02it/s]
|
1134 |
91%|βββββββββ | 9700/10682 [1:22:29<08:05, 2.02it/s]
|
1135 |
{'loss': 2.749, 'grad_norm': 0.27603113651275635, 'learning_rate': 2.5527849119658387e-05, 'epoch': 12.7}
|
1136 |
+
|
1137 |
91%|βββββββββ | 9700/10682 [1:22:29<08:05, 2.02it/s]
|
1138 |
91%|βββββββββ | 9701/10682 [1:22:30<08:05, 2.02it/s]
|
1139 |
91%|βββββββββ | 9702/10682 [1:22:30<08:04, 2.02it/s]
|
1140 |
91%|βββββββββ | 9703/10682 [1:22:31<08:04, 2.02it/s]
|
1141 |
91%|βββββββββ | 9704/10682 [1:22:31<08:03, 2.02it/s]
|
1142 |
91%|βββββββββ | 9705/10682 [1:22:32<08:03, 2.02it/s]
|
1143 |
91%|βββββββββ | 9706/10682 [1:22:32<08:02, 2.02it/s]
|
1144 |
91%|βββββββββ | 9707/10682 [1:22:33<08:02, 2.02it/s]
|
1145 |
91%|βββββββββ | 9708/10682 [1:22:33<08:01, 2.02it/s]
|
1146 |
91%|βββββββββ | 9709/10682 [1:22:34<08:01, 2.02it/s]
|
1147 |
91%|βββββββοΏ½οΏ½οΏ½β | 9710/10682 [1:22:34<08:00, 2.02it/s]
|
1148 |
91%|βββββββββ | 9711/10682 [1:22:35<08:00, 2.02it/s]
|
1149 |
91%|βββββββββ | 9712/10682 [1:22:35<07:59, 2.02it/s]
|
1150 |
91%|βββββββββ | 9713/10682 [1:22:36<07:58, 2.02it/s]
|
1151 |
91%|βββββββββ | 9714/10682 [1:22:36<07:58, 2.02it/s]
|
1152 |
91%|βββββββββ | 9715/10682 [1:22:37<07:57, 2.02it/s]
|
1153 |
91%|βββββββββ | 9716/10682 [1:22:37<07:57, 2.02it/s]
|
1154 |
91%|βββββββββ | 9717/10682 [1:22:38<07:56, 2.02it/s]
|
1155 |
91%|βββββββββ | 9718/10682 [1:22:38<07:56, 2.02it/s]
|
1156 |
91%|βββββββββ | 9719/10682 [1:22:39<07:55, 2.02it/s]
|
1157 |
91%|βββββββββ | 9720/10682 [1:22:39<07:54, 2.03it/s]
|
1158 |
91%|βββββββββ | 9721/10682 [1:22:40<07:54, 2.03it/s]
|
1159 |
91%|βββββββββ | 9722/10682 [1:22:40<07:54, 2.02it/s]
|
1160 |
91%|βββββββββ | 9723/10682 [1:22:41<07:53, 2.02it/s]
|
1161 |
91%|βββββββββ | 9724/10682 [1:22:41<07:53, 2.02it/s]
|
1162 |
91%|βββββββββ | 9725/10682 [1:22:42<07:52, 2.03it/s]
|
1163 |
{'loss': 2.756, 'grad_norm': 0.27851659059524536, 'learning_rate': 2.4255085494343522e-05, 'epoch': 12.74}
|
1164 |
+
|
1165 |
91%|βββββββββ | 9725/10682 [1:22:42<07:52, 2.03it/s]
|
1166 |
91%|βββββββββ | 9726/10682 [1:22:42<07:53, 2.02it/s]
|
1167 |
91%|βββββββββ | 9727/10682 [1:22:42<07:51, 2.02it/s]
|
1168 |
91%|βββββββββ | 9728/10682 [1:22:43<07:51, 2.02it/s]
|
1169 |
91%|βββββββββ | 9729/10682 [1:22:43<07:51, 2.02it/s]
|
1170 |
91%|βββββββββ | 9730/10682 [1:22:44<07:50, 2.02it/s]
|
1171 |
91%|βββββββββ | 9731/10682 [1:22:44<07:50, 2.02it/s]
|
1172 |
91%|βββββββββ | 9732/10682 [1:22:45<07:49, 2.02it/s]
|
1173 |
91%|βββββββββ | 9733/10682 [1:22:45<07:48, 2.02it/s]
|
1174 |
91%|βββββββββ | 9734/10682 [1:22:46<07:48, 2.02it/s]
|
1175 |
91%|βββββββββ | 9735/10682 [1:22:46<07:48, 2.02it/s]
|
1176 |
91%|βββββββββ | 9736/10682 [1:22:47<07:47, 2.02it/s]
|
1177 |
91%|βββββββββ | 9737/10682 [1:22:47<07:47, 2.02it/s]
|
1178 |
91%|βββββββββ | 9738/10682 [1:22:48<07:46, 2.02it/s]
|
1179 |
91%|βββββββββ | 9739/10682 [1:22:48<07:46, 2.02it/s]
|
1180 |
91%|βββββββββ | 9740/10682 [1:22:49<07:45, 2.02it/s]
|
1181 |
91%|βββββββββ | 9741/10682 [1:22:49<07:45, 2.02it/s]
|
1182 |
91%|βββββββββ | 9742/10682 [1:22:50<07:44, 2.02it/s]
|
1183 |
91%|βββββββββ | 9743/10682 [1:22:50<07:44, 2.02it/s]
|
1184 |
91%|βββββββββ | 9744/10682 [1:22:51<07:44, 2.02it/s]
|
1185 |
91%|βββββββββ | 9745/10682 [1:22:51<07:43, 2.02it/s]
|
1186 |
91%|βββββββββ | 9746/10682 [1:22:52<07:42, 2.02it/s]
|
1187 |
91%|βββββββββ | 9747/10682 [1:22:52<07:42, 2.02it/s]
|
1188 |
91%|ββββββββββ| 9748/10682 [1:22:53<07:42, 2.02it/s]
|
1189 |
91%|ββββββββββ| 9749/10682 [1:22:53<07:41, 2.02it/s]
|
1190 |
91%|ββββββββββ| 9750/10682 [1:22:54<07:40, 2.02it/s]
|
1191 |
{'loss': 2.7534, 'grad_norm': 0.2752934694290161, 'learning_rate': 2.301407844110154e-05, 'epoch': 12.77}
|
1192 |
+
|
1193 |
91%|ββββββββββ| 9750/10682 [1:22:54<07:40, 2.02it/s]
|
1194 |
91%|ββββββββββ| 9751/10682 [1:22:54<07:40, 2.02it/s]
|
1195 |
91%|ββββββββββ| 9752/10682 [1:22:55<07:39, 2.02it/s]
|
1196 |
91%|ββββββββββ| 9753/10682 [1:22:55<07:39, 2.02it/s]
|
1197 |
91%|ββββββββββ| 9754/10682 [1:22:56<07:39, 2.02it/s]
|
1198 |
91%|ββββββββββ| 9755/10682 [1:22:56<07:38, 2.02it/s]
|
1199 |
91%|ββββββββββ| 9756/10682 [1:22:57<07:38, 2.02it/s]
|
1200 |
91%|ββββββββββ| 9757/10682 [1:22:57<07:37, 2.02it/s]
|
1201 |
91%|ββββββββββ| 9758/10682 [1:22:58<07:37, 2.02it/s]
|
1202 |
91%|ββββββββββ| 9759/10682 [1:22:58<07:36, 2.02it/s]
|
1203 |
91%|ββββββββββ| 9760/10682 [1:22:59<07:36, 2.02it/s]
|
1204 |
91%|ββββββββββ| 9761/10682 [1:22:59<07:35, 2.02it/s]
|
1205 |
91%|ββββββββββ| 9762/10682 [1:23:00<07:34, 2.02it/s]
|
1206 |
91%|ββββββββββ| 9763/10682 [1:23:00<07:34, 2.02it/s]
|
1207 |
91%|ββββββββββ| 9764/10682 [1:23:01<07:33, 2.02it/s]
|
1208 |
91%|ββββββββββ| 9765/10682 [1:23:01<07:33, 2.02it/s]
|
1209 |
91%|ββββββββββ| 9766/10682 [1:23:02<07:32, 2.02it/s]
|
1210 |
91%|ββββββββββ| 9767/10682 [1:23:02<07:32, 2.02it/s]
|
1211 |
91%|ββββββββββ| 9768/10682 [1:23:03<07:32, 2.02it/s]
|
1212 |
91%|ββββββββββ| 9769/10682 [1:23:03<07:31, 2.02it/s]
|
1213 |
91%|ββββββββββ| 9770/10682 [1:23:04<07:31, 2.02it/s]
|
1214 |
91%|ββββββββββ| 9771/10682 [1:23:04<07:30, 2.02it/s]
|
1215 |
91%|ββββββββββ| 9772/10682 [1:23:05<07:30, 2.02it/s]
|
1216 |
91%|ββββββββββ| 9773/10682 [1:23:05<07:29, 2.02it/s]
|
1217 |
91%|ββββββββββ| 9774/10682 [1:23:06<07:28, 2.02it/s]
|
1218 |
92%|ββββββββββ| 9775/10682 [1:23:06<07:28, 2.02it/s]
|
1219 |
{'loss': 2.7569, 'grad_norm': 0.27959272265434265, 'learning_rate': 2.1804910798715826e-05, 'epoch': 12.8}
|
1220 |
+
|
1221 |
92%|ββββββββββ| 9775/10682 [1:23:06<07:28, 2.02it/s]
|
1222 |
92%|ββββββββββ| 9776/10682 [1:23:07<07:28, 2.02it/s]
|
1223 |
92%|ββββββββββ| 9777/10682 [1:23:07<07:27, 2.02it/s]
|
1224 |
92%|ββββββββββ| 9778/10682 [1:23:08<07:27, 2.02it/s]
|
1225 |
92%|ββββββββββ| 9779/10682 [1:23:08<07:26, 2.02it/s]
|
1226 |
92%|ββββββββββ| 9780/10682 [1:23:09<07:25, 2.02it/s]
|
1227 |
92%|ββββββββββ| 9781/10682 [1:23:09<07:25, 2.02it/s]
|
1228 |
92%|ββββββββββ| 9782/10682 [1:23:10<07:25, 2.02it/s]
|
1229 |
92%|ββββββββββ| 9783/10682 [1:23:10<07:24, 2.02it/s]
|
1230 |
92%|ββββββββββ| 9784/10682 [1:23:11<07:24, 2.02it/s]
|
1231 |
92%|ββββββββββ| 9785/10682 [1:23:11<07:23, 2.02it/s]
|
1232 |
92%|ββββββββββ| 9786/10682 [1:23:12<07:23, 2.02it/s]
|
1233 |
92%|ββββββββββ| 9787/10682 [1:23:12<07:22, 2.02it/s]
|
1234 |
92%|ββββββββββ| 9788/10682 [1:23:13<07:21, 2.02it/s]
|
1235 |
92%|ββββββββββ| 9789/10682 [1:23:13<07:21, 2.02it/s]
|
1236 |
92%|ββββββββββ| 9790/10682 [1:23:14<07:20, 2.03it/s]
|
1237 |
92%|ββββββββββ| 9791/10682 [1:23:14<07:20, 2.02it/s]
|
1238 |
92%|ββββββββββ| 9792/10682 [1:23:15<07:19, 2.02it/s]
|
1239 |
92%|ββββββββββ| 9793/10682 [1:23:15<07:19, 2.02it/s]
|
1240 |
92%|ββββββββββ| 9794/10682 [1:23:16<07:18, 2.02it/s]
|
1241 |
92%|ββββββββββ| 9795/10682 [1:23:16<07:17, 2.03it/s]
|
1242 |
92%|ββββββββββ| 9796/10682 [1:23:17<07:17, 2.02it/s]
|
1243 |
92%|ββββββββββ| 9797/10682 [1:23:17<07:16, 2.03it/s]
|
1244 |
92%|ββββββββββ| 9798/10682 [1:23:18<07:16, 2.03it/s]
|
1245 |
92%|ββββββββββ| 9799/10682 [1:23:18<07:15, 2.03it/s]
|
1246 |
92%|ββββββββββ| 9800/10682 [1:23:19<07:15, 2.02it/s]
|
1247 |
|
1248 |
+
|
1249 |
92%|ββββββββββ| 9800/10682 [1:23:19<07:15, 2.02it/s]
|
1250 |
92%|ββββββββββ| 9801/10682 [1:23:19<07:17, 2.01it/s]
|
1251 |
92%|ββββββββββ| 9802/10682 [1:23:20<07:16, 2.02it/s]
|
1252 |
92%|ββββββββββ| 9803/10682 [1:23:20<07:15, 2.02it/s]
|
1253 |
92%|ββββββββββ| 9804/10682 [1:23:21<07:14, 2.02it/s]
|
1254 |
92%|ββββββββββ| 9805/10682 [1:23:21<07:14, 2.02it/s]
|
1255 |
92%|ββββββββββ| 9806/10682 [1:23:22<07:13, 2.02it/s]
|
1256 |
92%|ββββββββββ| 9807/10682 [1:23:22<07:12, 2.02it/s]
|
1257 |
92%|ββββββββββ| 9808/10682 [1:23:23<07:12, 2.02it/s]
|
1258 |
92%|ββββββββββ| 9809/10682 [1:23:23<07:11, 2.02it/s]
|
1259 |
92%|ββββββββββ| 9810/10682 [1:23:24<07:11, 2.02it/s]
|
1260 |
92%|ββββββββββ| 9811/10682 [1:23:24<07:11, 2.02it/s]
|
1261 |
92%|ββββββββββ| 9812/10682 [1:23:25<07:10, 2.02it/s]
|
1262 |
92%|ββββββββββ| 9813/10682 [1:23:25<07:09, 2.02it/s]
|
1263 |
92%|ββββββββββ| 9814/10682 [1:23:26<07:09, 2.02it/s]
|
1264 |
92%|ββββββββββ| 9815/10682 [1:23:26<07:08, 2.02it/s]
|
1265 |
92%|ββββββββββ| 9816/10682 [1:23:27<07:08, 2.02it/s]
|
1266 |
92%|ββββββββββ| 9817/10682 [1:23:27<07:07, 2.02it/s]
|
1267 |
92%|ββββββββββ| 9818/10682 [1:23:28<07:07, 2.02it/s]
|
1268 |
92%|ββββββββββ| 9819/10682 [1:23:28<07:06, 2.02it/s]
|
1269 |
92%|ββββββββββ| 9820/10682 [1:23:28<07:06, 2.02it/s]
|
1270 |
92%|ββββββββββ| 9821/10682 [1:23:29<07:05, 2.02it/s]
|
1271 |
92%|ββββββββββ| 9822/10682 [1:23:29<07:05, 2.02it/s]
|
1272 |
92%|ββββββββββ| 9823/10682 [1:23:30<07:04, 2.02it/s]
|
1273 |
92%|ββββββββββ| 9824/10682 [1:23:30<07:04, 2.02it/s]
|
1274 |
92%|ββββββββββ| 9825/10682 [1:23:31<07:03, 2.02it/s]
|
1275 |
|
1276 |
+
|
1277 |
92%|ββββββββββ| 9825/10682 [1:23:31<07:03, 2.02it/s]
|
1278 |
92%|ββββββββββ| 9826/10682 [1:23:31<07:03, 2.02it/s]
|
1279 |
92%|ββββββββββ| 9827/10682 [1:23:32<07:02, 2.02it/s]
|
1280 |
92%|ββββββββββ| 9828/10682 [1:23:32<07:02, 2.02it/s]
|
1281 |
92%|ββββββββββ| 9829/10682 [1:23:33<07:01, 2.02it/s]
|
1282 |
92%|ββββββββββ| 9830/10682 [1:23:33<07:01, 2.02it/s]
|
1283 |
92%|ββββββββββ| 9831/10682 [1:23:34<07:00, 2.02it/s]
|
1284 |
92%|ββββββββββ| 9832/10682 [1:23:34<06:59, 2.02it/s]
|
1285 |
92%|ββββββββββ| 9833/10682 [1:23:35<06:59, 2.02it/s]
|
1286 |
92%|ββββββββββ| 9834/10682 [1:23:35<06:58, 2.03it/s]
|
1287 |
92%|ββββββββββ| 9835/10682 [1:23:36<06:58, 2.02it/s]
|
1288 |
92%|ββββββββββ| 9836/10682 [1:23:36<06:57, 2.02it/s]
|
1289 |
92%|ββββββββββ| 9837/10682 [1:23:37<06:57, 2.02it/s]
|
1290 |
92%|ββββββββββ| 9838/10682 [1:23:37<06:57, 2.02it/s]
|
1291 |
92%|ββββββββββ| 9839/10682 [1:23:38<06:56, 2.02it/s]
|
1292 |
92%|ββββββββββ| 9840/10682 [1:23:38<06:56, 2.02it/s]
|
1293 |
92%|ββββββββββ| 9841/10682 [1:23:39<06:55, 2.02it/s]
|
1294 |
92%|ββββββββββ| 9842/10682 [1:23:39<06:55, 2.02it/s]
|
1295 |
92%|ββββββββββ| 9843/10682 [1:23:40<06:54, 2.02it/s]
|
1296 |
92%|ββββββββββ| 9844/10682 [1:23:40<06:54, 2.02it/s]
|
1297 |
92%|ββββββββββ| 9845/10682 [1:23:41<06:54, 2.02it/s]
|
1298 |
92%|ββββββββββ| 9846/10682 [1:23:41<06:54, 2.02it/s]
|
1299 |
92%|ββββββββββ| 9847/10682 [1:23:42<06:53, 2.02it/s]
|
1300 |
92%|ββββββββββ| 9848/10682 [1:23:42<06:52, 2.02it/s]
|
1301 |
92%|ββββββββββ| 9849/10682 [1:23:43<06:52, 2.02it/s]
|
1302 |
92%|ββββββββββ| 9850/10682 [1:23:43<06:51, 2.02it/s]{'loss': 2.7634, 'grad_norm': 0.27715760469436646, 'learning_rate': 1.8369240812535104e-05, 'epoch': 12.9}
|
1303 |
|
1304 |
+
|
1305 |
92%|ββββββββββ| 9850/10682 [1:23:43<06:51, 2.02it/s]
|
1306 |
92%|ββββββββββ| 9851/10682 [1:23:44<06:51, 2.02it/s]
|
1307 |
92%|ββββββββββ| 9852/10682 [1:23:44<06:51, 2.02it/s]
|
1308 |
92%|ββββββββββ| 9853/10682 [1:23:45<06:50, 2.02it/s]
|
1309 |
92%|ββββββββββ| 9854/10682 [1:23:45<06:50, 2.02it/s]
|
1310 |
92%|ββββββββββ| 9855/10682 [1:23:46<06:49, 2.02it/s]
|
1311 |
92%|ββββββββββ| 9856/10682 [1:23:46<06:49, 2.02it/s]
|
1312 |
92%|ββββββββββ| 9857/10682 [1:23:47<06:47, 2.02it/s]
|
1313 |
92%|ββββββββββ| 9858/10682 [1:23:47<06:47, 2.02it/s]
|
1314 |
92%|ββββββββββ| 9859/10682 [1:23:48<06:46, 2.02it/s]
|
1315 |
92%|ββββββββββ| 9860/10682 [1:23:48<06:46, 2.02it/s]
|
1316 |
92%|ββββββββββ| 9861/10682 [1:23:49<06:45, 2.02it/s]
|
1317 |
92%|ββββββββββ| 9862/10682 [1:23:49<06:45, 2.02it/s]
|
1318 |
92%|ββββββββββ| 9863/10682 [1:23:50<06:44, 2.02it/s]
|
1319 |
92%|ββββββββββ| 9864/10682 [1:23:50<06:44, 2.02it/s]
|
1320 |
92%|ββββββββββ| 9865/10682 [1:23:51<06:43, 2.02it/s]
|
1321 |
92%|ββββββββββ| 9866/10682 [1:23:51<06:43, 2.02it/s]
|
1322 |
92%|ββββββββββ| 9867/10682 [1:23:52<06:43, 2.02it/s]
|
1323 |
92%|ββββββββββ| 9868/10682 [1:23:52<06:42, 2.02it/s]
|
1324 |
92%|ββββββββββ| 9869/10682 [1:23:53<06:42, 2.02it/s]
|
1325 |
92%|ββββββββββ| 9870/10682 [1:23:53<06:41, 2.02it/s]
|
1326 |
92%|ββββββββββ| 9871/10682 [1:23:54<06:40, 2.02it/s]
|
1327 |
92%|ββββββββββ| 9872/10682 [1:23:54<06:40, 2.02it/s]
|
1328 |
92%|ββββββββββ| 9873/10682 [1:23:55<06:39, 2.02it/s]
|
1329 |
92%|ββββββββββ| 9874/10682 [1:23:55<06:39, 2.02it/s]
|
1330 |
92%|ββββββββββ| 9875/10682 [1:23:56<06:38, 2.02it/s]
|
1331 |
|
1332 |
+
|
1333 |
92%|ββββββββββ| 9875/10682 [1:23:56<06:38, 2.02it/s]
|
1334 |
92%|ββββββββββ| 9876/10682 [1:23:56<06:38, 2.02it/s]
|
1335 |
92%|ββββββββββ| 9877/10682 [1:23:57<06:38, 2.02it/s]
|
1336 |
92%|ββββββββββ| 9878/10682 [1:23:57<06:38, 2.02it/s]
|
1337 |
92%|ββββββββββ| 9879/10682 [1:23:58<06:37, 2.02it/s]
|
1338 |
92%|ββββββββββ| 9880/10682 [1:23:58<06:36, 2.02it/s]
|
1339 |
93%|ββββββββββ| 9881/10682 [1:23:59<06:35, 2.02it/s]
|
1340 |
93%|ββββββββββ| 9882/10682 [1:23:59<06:35, 2.02it/s]
|
1341 |
93%|ββββββββββ| 9883/10682 [1:24:00<06:35, 2.02it/s]
|
1342 |
93%|ββββββββββ| 9884/10682 [1:24:00<06:34, 2.02it/s]
|
1343 |
93%|ββββββββββ| 9885/10682 [1:24:01<06:34, 2.02it/s]
|
1344 |
93%|ββββββββββ| 9886/10682 [1:24:01<06:33, 2.02it/s]
|
1345 |
93%|ββββββββββ| 9887/10682 [1:24:02<06:32, 2.02it/s]
|
1346 |
93%|ββββββββββ| 9888/10682 [1:24:02<06:32, 2.02it/s]
|
1347 |
93%|ββββββββββ| 9889/10682 [1:24:03<06:32, 2.02it/s]
|
1348 |
93%|ββββββββββ| 9890/10682 [1:24:03<06:31, 2.02it/s]
|
1349 |
93%|ββββββββββ| 9891/10682 [1:24:04<06:31, 2.02it/s]
|
1350 |
93%|ββββββββββ| 9892/10682 [1:24:04<06:30, 2.02it/s]
|
1351 |
93%|ββββββββββ| 9893/10682 [1:24:05<06:29, 2.02it/s]
|
1352 |
93%|ββββββββββ| 9894/10682 [1:24:05<06:29, 2.02it/s]
|
1353 |
93%|ββββββββββ| 9895/10682 [1:24:06<06:28, 2.03it/s]
|
1354 |
93%|ββββββββββ| 9896/10682 [1:24:06<06:28, 2.02it/s]
|
1355 |
93%|ββββββββββ| 9897/10682 [1:24:07<06:27, 2.02it/s]
|
1356 |
93%|ββββββββββ| 9898/10682 [1:24:07<06:27, 2.02it/s]
|
1357 |
93%|ββββββββββ| 9899/10682 [1:24:08<06:26, 2.02it/s]
|
1358 |
93%|ββββββββββ| 9900/10682 [1:24:08<06:26, 2.02it/s]{'loss': 2.7483, 'grad_norm': 0.2770647406578064, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
1359 |
|
1360 |
+
|
1361 |
93%|ββββββββββ| 9900/10682 [1:24:08<06:26, 2.02it/s]
|
1362 |
93%|ββββββββββ| 9901/10682 [1:24:09<06:27, 2.02it/s]
|
1363 |
93%|ββββββββββ| 9902/10682 [1:24:09<06:26, 2.02it/s]
|
1364 |
93%|ββββββββββ| 9903/10682 [1:24:10<06:25, 2.02it/s]
|
1365 |
93%|ββββββββββ| 9904/10682 [1:24:10<06:25, 2.02it/s]
|
1366 |
93%|ββββββββββ| 9905/10682 [1:24:11<06:24, 2.02it/s]
|
1367 |
93%|ββββββββββ| 9906/10682 [1:24:11<06:23, 2.02it/s]
|
1368 |
93%|ββββββββββ| 9907/10682 [1:24:12<06:23, 2.02it/s]
|
1369 |
93%|ββββββββββ| 9908/10682 [1:24:12<06:23, 2.02it/s]
|
1370 |
93%|ββββββββββ| 9909/10682 [1:24:13<06:22, 2.02it/s]
|
1371 |
93%|ββββββββββ| 9910/10682 [1:24:13<06:21, 2.02it/s]
|
1372 |
93%|ββββββββββ| 9911/10682 [1:24:13<06:21, 2.02it/s]
|
1373 |
93%|ββββββββββ| 9912/10682 [1:24:14<06:20, 2.02it/s]
|
1374 |
93%|ββββββββββ| 9913/10682 [1:24:14<06:20, 2.02it/s]
|
1375 |
93%|ββββββββββ| 9914/10682 [1:24:15<06:19, 2.02it/s]
|
1376 |
93%|ββββββββββ| 9915/10682 [1:24:15<06:19, 2.02it/s]
|
1377 |
93%|ββββββββββ| 9916/10682 [1:24:16<06:18, 2.02it/s]
|
1378 |
93%|ββββββββββ| 9917/10682 [1:24:16<06:18, 2.02it/s]
|
1379 |
93%|ββββββββββ| 9918/10682 [1:24:17<06:17, 2.02it/s]
|
1380 |
93%|ββββββββββ| 9919/10682 [1:24:17<06:17, 2.02it/s]
|
1381 |
93%|ββββββββββ| 9920/10682 [1:24:18<06:16, 2.02it/s]
|
1382 |
93%|ββββββββββ| 9921/10682 [1:24:18<06:16, 2.02it/s]
|
1383 |
93%|ββββββββββ| 9922/10682 [1:24:19<06:15, 2.02it/s]
|
1384 |
93%|ββββββββββ| 9923/10682 [1:24:19<06:15, 2.02it/s]
|
1385 |
93%|ββββββββββ| 9924/10682 [1:24:20<06:15, 2.02it/s]
|
1386 |
93%|ββββββββββ| 9925/10682 [1:24:20<06:23, 1.98it/s]
|
1387 |
|
1388 |
+
|
1389 |
93%|ββββββββββ| 9925/10682 [1:24:20<06:23, 1.98it/s]
|
1390 |
93%|ββββββββββ| 9926/10682 [1:24:32<49:46, 3.95s/it]
|
1391 |
93%|ββββββββββ| 9927/10682 [1:24:33<36:40, 2.91s/it]
|
1392 |
93%|ββββββββββ| 9928/10682 [1:24:33<27:31, 2.19s/it]
|
1393 |
93%|ββββββββββ| 9929/10682 [1:24:34<21:07, 1.68s/it]
|
1394 |
93%|ββββββββββ| 9930/10682 [1:24:34<16:38, 1.33s/it]
|
1395 |
93%|ββββββββββ| 9931/10682 [1:24:35<13:29, 1.08s/it]
|
1396 |
93%|ββββββββββ| 9932/10682 [1:24:35<11:17, 1.11it/s]
|
1397 |
93%|ββββββββββ| 9933/10682 [1:24:36<09:47, 1.28it/s]
|
1398 |
93%|ββββββββββ| 9934/10682 [1:24:36<08:42, 1.43it/s]
|
1399 |
93%|ββββββββββ| 9935/10682 [1:24:37<07:55, 1.57it/s]
|
1400 |
93%|ββββββββββ| 9936/10682 [1:24:37<07:22, 1.68it/s]
|
1401 |
93%|ββββββββββ| 9937/10682 [1:24:38<06:59, 1.78it/s]
|
1402 |
93%|ββββββββββ| 9938/10682 [1:24:38<06:43, 1.84it/s]
|
1403 |
93%|ββββββββββ| 9939/10682 [1:24:39<06:32, 1.89it/s]
|
1404 |
93%|ββββββββββ| 9940/10682 [1:24:39<06:24, 1.93it/s]
|
1405 |
93%|ββββββββββ| 9941/10682 [1:24:40<06:19, 1.95it/s]
|
1406 |
93%|ββββββββββ| 9942/10682 [1:24:40<06:15, 1.97it/s]
|
1407 |
93%|ββββββββββ| 9943/10682 [1:24:41<06:12, 1.99it/s]
|
1408 |
93%|ββββββββββ| 9944/10682 [1:24:41<06:10, 1.99it/s]
|
1409 |
93%|ββββββββββ| 9945/10682 [1:24:42<06:08, 2.00it/s]
|
1410 |
93%|ββββββββββ| 9946/10682 [1:24:42<06:06, 2.01it/s]
|
1411 |
93%|ββββββββββ| 9947/10682 [1:24:43<06:05, 2.01it/s]
|
1412 |
93%|ββββββββββ| 9948/10682 [1:24:43<06:04, 2.01it/s]
|
1413 |
93%|ββββββββββ| 9949/10682 [1:24:44<06:04, 2.01it/s]
|