diff --git "a/run-2024-10-25T01:17:54+00:00.log" "b/run-2024-10-25T01:17:54+00:00.log" --- "a/run-2024-10-25T01:17:54+00:00.log" +++ "b/run-2024-10-25T01:17:54+00:00.log" @@ -2172,4 +2172,1069 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 13%|█▎ | 7740/61904 [4:03:13<8981:53:45, 596.98s/it] {'loss': 2.8846, 'learning_rate': 1.8778037080254114e-07, 'epoch': 2.0} 13%|█▎ | 7740/61904 [4:03:13<8981:53:45, 596.98s/it] 13%|█▎ | 7741/61904 [4:03:14<6294:35:44, 418.38s/it] 13%|█▎ | 7742/61904 [4:03:16<4412:32:50, 293.29s/it] 13%|█▎ | 7743/61904 [4:03:17<3095:55:39, 205.78s/it] 13%|█▎ | 7744/61904 [4:03:19<2173:25:21, 144.47s/it] 13%|█▎ | 7745/61904 [4:03:20<1527:26:31, 101.53s/it] 13%|█▎ | 7746/61904 [4:03:21<1075:47:08, 71.51s/it] 13%|█▎ | 7747/61904 [4:03:23<759:00:34, 50.45s/it] 13%|█▎ | 7748/61904 [4:03:24<537:19:22, 35.72s/it] 13%|█▎ | 7749/61904 [4:03:26<383:04:52, 25.47s/it] 13%|█▎ | 7750/61904 [4:03:27<274:40:16, 18.26s/it] 13%|█▎ | 7751/61904 [4:03:28<198:03:11, 13.17s/it] 13%|█▎ | 7752/61904 [4:03:30<144:57:18, 9.64s/it] 13%|█▎ | 7753/61904 [4:03:31<107:21:09, 7.14s/it] 13%|█▎ | 7754/61904 [4:03:33<81:43:30, 5.43s/it] 13%|█▎ | 7755/61904 [4:03:34<63:53:19, 4.25s/it] 13%|█▎ | 7756/61904 [4:03:35<50:55:00, 3.39s/it] 13%|█▎ | 7757/61904 [4:03:37<41:40:24, 2.77s/it] 13%|█▎ | 7758/61904 [4:03:38<35:20:56, 2.35s/it] 13%|█▎ | 7759/61904 [4:03:40<31:42:49, 2.11s/it] 13%|█▎ | 7760/61904 [4:03:41<28:36:23, 1.90s/it] {'loss': 2.954, 'learning_rate': 1.8774795799299882e-07, 'epoch': 2.01} 13%|█▎ | 7760/61904 [4:03:41<28:36:23, 1.90s/it] 13%|█▎ | 7761/61904 [4:03:42<25:49:01, 1.72s/it] 13%|█▎ | 7762/61904 [4:03:44<24:44:08, 1.64s/it] 13%|█▎ | 7763/61904 [4:03:45<23:22:02, 1.55s/it] 13%|█▎ | 7764/61904 [4:03:47<22:52:41, 1.52s/it] 13%|█▎ | 7765/61904 [4:03:48<21:59:31, 1.46s/it] 13%|█▎ | 7766/61904 [4:03:49<21:38:15, 1.44s/it] 13%|█▎ | 7767/61904 [4:03:51<21:01:04, 1.40s/it] 13%|█▎ | 7768/61904 [4:03:52<21:06:03, 1.40s/it] 13%|█▎ | 7769/61904 [4:03:53<20:51:05, 1.39s/it] 13%|█▎ | 7770/61904 [4:03:55<20:26:06, 1.36s/it] 13%|█▎ | 7771/61904 [4:03:56<21:34:45, 1.44s/it] 13%|█▎ | 7772/61904 [4:03:58<21:46:10, 1.45s/it] 13%|█▎ | 7773/61904 [4:03:59<21:50:34, 1.45s/it] 13%|█▎ | 7774/61904 [4:04:01<21:52:49, 1.46s/it] 13%|█▎ | 7775/61904 [4:04:02<21:18:00, 1.42s/it] 13%|█▎ | 7776/61904 [4:04:03<21:13:53, 1.41s/it] 13%|█▎ | 7777/61904 [4:04:05<20:48:41, 1.38s/it] 13%|█▎ | 7778/61904 [4:04:06<20:11:15, 1.34s/it] 13%|█▎ | 7779/61904 [4:04:08<22:02:09, 1.47s/it] 13%|█▎ | 7780/61904 [4:04:09<22:08:46, 1.47s/it] {'loss': 2.9074, 'learning_rate': 1.8771554518345649e-07, 'epoch': 2.01} - 13%|█▎ | 7780/61904 [4:04:09<22:08:46, 1.47s/it] \ No newline at end of file + 13%|█▎ | 7780/61904 [4:04:09<22:08:46, 1.47s/it] 13%|█▎ | 7781/61904 [4:04:11<22:02:07, 1.47s/it] 13%|█▎ | 7782/61904 [4:04:12<21:16:38, 1.42s/it] 13%|█▎ | 7783/61904 [4:04:13<20:51:55, 1.39s/it] 13%|█▎ | 7784/61904 [4:04:15<21:14:21, 1.41s/it] 13%|█▎ | 7785/61904 [4:04:16<20:54:57, 1.39s/it] 13%|█▎ | 7786/61904 [4:04:18<21:06:56, 1.40s/it] 13%|█▎ | 7787/61904 [4:04:19<21:30:57, 1.43s/it] 13%|█▎ | 7788/61904 [4:04:20<21:36:34, 1.44s/it] 13%|█▎ | 7789/61904 [4:04:22<21:09:52, 1.41s/it] 13%|█▎ | 7790/61904 [4:04:23<20:48:51, 1.38s/it] 13%|█▎ | 7791/61904 [4:04:25<20:38:53, 1.37s/it] 13%|█▎ | 7792/61904 [4:04:26<21:03:17, 1.40s/it] 13%|█▎ | 7793/61904 [4:04:27<20:41:08, 1.38s/it] 13%|█▎ | 7794/61904 [4:04:29<20:28:24, 1.36s/it] 13%|█▎ | 7795/61904 [4:04:30<20:29:12, 1.36s/it] 13%|█▎ | 7796/61904 [4:04:31<21:04:25, 1.40s/it] 13%|█▎ | 7797/61904 [4:04:33<21:29:19, 1.43s/it] 13%|█▎ | 7798/61904 [4:04:34<21:07:13, 1.41s/it] 13%|█▎ | 7799/61904 [4:04:36<20:50:26, 1.39s/it] 13%|█▎ | 7800/61904 [4:04:37<20:31:32, 1.37s/it] {'loss': 2.8973, 'learning_rate': 1.8768313237391415e-07, 'epoch': 2.02} + 13%|█▎ | 7800/61904 [4:04:37<20:31:32, 1.37s/it] 13%|█▎ | 7801/61904 [4:04:38<21:02:55, 1.40s/it] 13%|█▎ | 7802/61904 [4:04:40<21:09:16, 1.41s/it] 13%|█▎ | 7803/61904 [4:04:41<21:11:41, 1.41s/it] 13%|█▎ | 7804/61904 [4:04:43<21:54:44, 1.46s/it] 13%|█▎ | 7805/61904 [4:04:45<22:55:43, 1.53s/it] 13%|█▎ | 7806/61904 [4:04:46<22:01:23, 1.47s/it] 13%|█▎ | 7807/61904 [4:04:47<21:55:16, 1.46s/it] 13%|█▎ | 7808/61904 [4:04:49<21:02:42, 1.40s/it] 13%|█▎ | 7809/61904 [4:04:50<20:10:06, 1.34s/it] 13%|█▎ | 7810/61904 [4:04:51<20:58:23, 1.40s/it] 13%|█▎ | 7811/61904 [4:04:53<21:30:50, 1.43s/it] 13%|█▎ | 7812/61904 [4:04:54<21:09:00, 1.41s/it] 13%|█▎ | 7813/61904 [4:04:56<21:26:12, 1.43s/it] 13%|█▎ | 7814/61904 [4:04:57<21:19:53, 1.42s/it] 13%|█▎ | 7815/61904 [4:04:59<21:35:14, 1.44s/it] 13%|█▎ | 7816/61904 [4:05:00<21:12:04, 1.41s/it] 13%|█▎ | 7817/61904 [4:05:01<20:58:11, 1.40s/it] 13%|█▎ | 7818/61904 [4:05:03<20:52:47, 1.39s/it] 13%|█▎ | 7819/61904 [4:05:04<20:24:17, 1.36s/it] 13%|█▎ | 7820/61904 [4:05:05<20:50:50, 1.39s/it] {'loss': 2.9155, 'learning_rate': 1.8765071956437184e-07, 'epoch': 2.02} + 13%|█▎ | 7820/61904 [4:05:05<20:50:50, 1.39s/it] 13%|█▎ | 7821/61904 [4:05:07<20:23:42, 1.36s/it] 13%|█▎ | 7822/61904 [4:05:08<20:21:56, 1.36s/it] 13%|█▎ | 7823/61904 [4:05:09<20:27:54, 1.36s/it] 13%|█▎ | 7824/61904 [4:05:11<20:45:23, 1.38s/it] 13%|█▎ | 7825/61904 [4:05:12<21:18:26, 1.42s/it] 13%|█▎ | 7826/61904 [4:05:14<21:53:08, 1.46s/it] 13%|█▎ | 7827/61904 [4:05:15<21:16:27, 1.42s/it] 13%|█▎ | 7828/61904 [4:05:17<20:55:04, 1.39s/it] 13%|█▎ | 7829/61904 [4:05:18<21:46:11, 1.45s/it] 13%|█▎ | 7830/61904 [4:05:19<21:03:11, 1.40s/it] 13%|█▎ | 7831/61904 [4:05:21<20:34:28, 1.37s/it] 13%|█▎ | 7832/61904 [4:05:22<21:12:22, 1.41s/it] 13%|█▎ | 7833/61904 [4:05:24<21:12:29, 1.41s/it] 13%|█▎ | 7834/61904 [4:05:25<21:32:42, 1.43s/it] 13%|█▎ | 7835/61904 [4:05:27<21:32:46, 1.43s/it] 13%|█▎ | 7836/61904 [4:05:28<20:53:43, 1.39s/it] 13%|█▎ | 7837/61904 [4:05:29<20:44:55, 1.38s/it] 13%|█▎ | 7838/61904 [4:05:31<21:45:47, 1.45s/it] 13%|█▎ | 7839/61904 [4:05:32<21:38:20, 1.44s/it] 13%|█▎ | 7840/61904 [4:05:34<21:27:08, 1.43s/it] {'loss': 2.9162, 'learning_rate': 1.876183067548295e-07, 'epoch': 2.03} + 13%|█▎ | 7840/61904 [4:05:34<21:27:08, 1.43s/it] 13%|█▎ | 7841/61904 [4:05:35<21:27:13, 1.43s/it] 13%|█▎ | 7842/61904 [4:05:37<22:10:25, 1.48s/it] 13%|█▎ | 7843/61904 [4:05:38<21:17:32, 1.42s/it] 13%|█▎ | 7844/61904 [4:05:39<21:24:21, 1.43s/it] 13%|█▎ | 7845/61904 [4:05:41<21:10:59, 1.41s/it] 13%|█▎ | 7846/61904 [4:05:42<20:53:02, 1.39s/it] 13%|█▎ | 7847/61904 [4:05:43<20:22:38, 1.36s/it] 13%|█▎ | 7848/61904 [4:05:45<20:12:45, 1.35s/it] 13%|█▎ | 7849/61904 [4:05:46<20:48:03, 1.39s/it] 13%|█▎ | 7850/61904 [4:05:48<21:06:05, 1.41s/it] 13%|█▎ | 7851/61904 [4:05:49<20:36:00, 1.37s/it] 13%|█▎ | 7852/61904 [4:05:50<20:16:33, 1.35s/it] 13%|█▎ | 7853/61904 [4:05:52<20:52:55, 1.39s/it] 13%|█▎ | 7854/61904 [4:05:53<20:40:52, 1.38s/it] 13%|█▎ | 7855/61904 [4:05:54<20:03:05, 1.34s/it] 13%|█▎ | 7856/61904 [4:05:56<20:12:48, 1.35s/it] 13%|█▎ | 7857/61904 [4:05:57<20:06:10, 1.34s/it] 13%|█▎ | 7858/61904 [4:05:58<20:43:42, 1.38s/it] 13%|█▎ | 7859/61904 [4:06:00<21:16:57, 1.42s/it] 13%|█▎ | 7860/61904 [4:06:01<21:18:18, 1.42s/it] {'loss': 2.8391, 'learning_rate': 1.8758589394528716e-07, 'epoch': 2.03} + 13%|█▎ | 7860/61904 [4:06:01<21:18:18, 1.42s/it] 13%|█▎ | 7861/61904 [4:06:03<20:45:59, 1.38s/it] 13%|█▎ | 7862/61904 [4:06:04<20:14:43, 1.35s/it] 13%|█▎ | 7863/61904 [4:06:05<20:28:34, 1.36s/it] 13%|█▎ | 7864/61904 [4:06:07<21:10:46, 1.41s/it] 13%|█▎ | 7865/61904 [4:06:08<21:15:49, 1.42s/it] 13%|█▎ | 7866/61904 [4:06:10<21:27:55, 1.43s/it] 13%|█▎ | 7867/61904 [4:06:11<21:17:55, 1.42s/it] 13%|█▎ | 7868/61904 [4:06:12<20:37:48, 1.37s/it] 13%|█▎ | 7869/61904 [4:06:14<20:39:05, 1.38s/it] 13%|█▎ | 7870/61904 [4:06:15<20:43:28, 1.38s/it] 13%|█▎ | 7871/61904 [4:06:17<20:37:22, 1.37s/it] 13%|█▎ | 7872/61904 [4:06:18<20:08:39, 1.34s/it] 13%|█▎ | 7873/61904 [4:06:19<20:17:15, 1.35s/it] 13%|█▎ | 7874/61904 [4:06:20<19:53:25, 1.33s/it] 13%|█▎ | 7875/61904 [4:06:22<20:21:07, 1.36s/it] 13%|█▎ | 7876/61904 [4:06:23<21:37:25, 1.44s/it] 13%|█▎ | 7877/61904 [4:06:25<20:54:56, 1.39s/it] 13%|█▎ | 7878/61904 [4:06:26<20:50:02, 1.39s/it] 13%|█▎ | 7879/61904 [4:06:27<20:24:16, 1.36s/it] 13%|█▎ | 7880/61904 [4:06:29<19:44:04, 1.32s/it] {'loss': 2.8963, 'learning_rate': 1.8755348113574485e-07, 'epoch': 2.04} + 13%|█▎ | 7880/61904 [4:06:29<19:44:04, 1.32s/it] 13%|█▎ | 7881/61904 [4:06:30<19:28:35, 1.30s/it] 13%|█▎ | 7882/61904 [4:06:31<20:07:01, 1.34s/it] 13%|█▎ | 7883/61904 [4:06:33<20:32:59, 1.37s/it] 13%|█▎ | 7884/61904 [4:06:34<20:11:44, 1.35s/it] 13%|█▎ | 7885/61904 [4:06:35<20:15:18, 1.35s/it] 13%|█▎ | 7886/61904 [4:06:37<20:38:29, 1.38s/it] 13%|█▎ | 7887/61904 [4:06:38<20:33:41, 1.37s/it] 13%|█▎ | 7888/61904 [4:06:40<20:24:55, 1.36s/it] 13%|█▎ | 7889/61904 [4:06:41<20:33:59, 1.37s/it] 13%|█▎ | 7890/61904 [4:06:42<20:21:07, 1.36s/it] 13%|█▎ | 7891/61904 [4:06:44<20:56:04, 1.40s/it] 13%|█▎ | 7892/61904 [4:06:45<20:59:05, 1.40s/it] 13%|█▎ | 7893/61904 [4:06:47<20:56:36, 1.40s/it] 13%|█▎ | 7894/61904 [4:06:48<20:48:09, 1.39s/it] 13%|█▎ | 7895/61904 [4:06:49<20:50:03, 1.39s/it] 13%|█▎ | 7896/61904 [4:06:51<21:00:35, 1.40s/it] 13%|█▎ | 7897/61904 [4:06:52<20:33:56, 1.37s/it] 13%|█▎ | 7898/61904 [4:06:53<20:16:11, 1.35s/it] 13%|█▎ | 7899/61904 [4:06:55<19:51:49, 1.32s/it] 13%|█▎ | 7900/61904 [4:06:56<20:19:35, 1.35s/it] {'loss': 2.9148, 'learning_rate': 1.8752106832620248e-07, 'epoch': 2.04} + 13%|█▎ | 7900/61904 [4:06:56<20:19:35, 1.35s/it] 13%|█▎ | 7901/61904 [4:06:57<20:05:23, 1.34s/it] 13%|█▎ | 7902/61904 [4:06:59<20:10:33, 1.35s/it] 13%|█▎ | 7903/61904 [4:07:00<21:00:01, 1.40s/it] 13%|█▎ | 7904/61904 [4:07:02<21:15:42, 1.42s/it] 13%|█▎ | 7905/61904 [4:07:03<20:57:20, 1.40s/it] 13%|█▎ | 7906/61904 [4:07:04<20:12:59, 1.35s/it] 13%|█▎ | 7907/61904 [4:07:06<19:57:22, 1.33s/it] 13%|█▎ | 7908/61904 [4:07:07<20:11:38, 1.35s/it] 13%|█▎ | 7909/61904 [4:07:08<20:05:54, 1.34s/it] 13%|█▎ | 7910/61904 [4:07:10<20:08:54, 1.34s/it] 13%|█▎ | 7911/61904 [4:07:11<20:12:11, 1.35s/it] 13%|█▎ | 7912/61904 [4:07:12<20:12:21, 1.35s/it] 13%|█▎ | 7913/61904 [4:07:14<21:18:02, 1.42s/it] 13%|█▎ | 7914/61904 [4:07:15<20:36:30, 1.37s/it] 13%|█▎ | 7915/61904 [4:07:17<20:40:32, 1.38s/it] 13%|█▎ | 7916/61904 [4:07:18<20:11:44, 1.35s/it] 13%|█▎ | 7917/61904 [4:07:19<20:12:51, 1.35s/it] 13%|█▎ | 7918/61904 [4:07:21<20:50:39, 1.39s/it] 13%|█▎ | 7919/61904 [4:07:22<20:52:51, 1.39s/it] 13%|█▎ | 7920/61904 [4:07:23<20:04:20, 1.34s/it] {'loss': 2.9378, 'learning_rate': 1.8748865551666017e-07, 'epoch': 2.05} + 13%|█▎ | 7920/61904 [4:07:23<20:04:20, 1.34s/it] 13%|█▎ | 7921/61904 [4:07:25<19:30:43, 1.30s/it] 13%|█▎ | 7922/61904 [4:07:26<19:45:36, 1.32s/it] 13%|█▎ | 7923/61904 [4:07:27<19:35:56, 1.31s/it] 13%|█▎ | 7924/61904 [4:07:29<20:02:11, 1.34s/it] 13%|█▎ | 7925/61904 [4:07:30<19:47:17, 1.32s/it] 13%|█▎ | 7926/61904 [4:07:31<19:55:00, 1.33s/it] 13%|█▎ | 7927/61904 [4:07:32<19:39:03, 1.31s/it] 13%|█▎ | 7928/61904 [4:07:34<19:33:13, 1.30s/it] 13%|█▎ | 7929/61904 [4:07:35<19:17:52, 1.29s/it] 13%|█▎ | 7930/61904 [4:07:36<19:33:46, 1.30s/it] 13%|█▎ | 7931/61904 [4:07:38<19:44:18, 1.32s/it] 13%|█▎ | 7932/61904 [4:07:39<20:03:45, 1.34s/it] 13%|█▎ | 7933/61904 [4:07:41<20:35:32, 1.37s/it] 13%|█▎ | 7934/61904 [4:07:42<20:19:33, 1.36s/it] 13%|█▎ | 7935/61904 [4:07:43<20:17:47, 1.35s/it] 13%|█▎ | 7936/61904 [4:07:45<20:38:33, 1.38s/it] 13%|█▎ | 7937/61904 [4:07:46<20:06:26, 1.34s/it] 13%|█▎ | 7938/61904 [4:07:47<20:30:48, 1.37s/it] 13%|█▎ | 7939/61904 [4:07:49<20:22:07, 1.36s/it] 13%|█▎ | 7940/61904 [4:07:50<20:32:37, 1.37s/it] {'loss': 2.8385, 'learning_rate': 1.8745624270711783e-07, 'epoch': 2.05} + 13%|█▎ | 7940/61904 [4:07:50<20:32:37, 1.37s/it] 13%|█▎ | 7941/61904 [4:07:51<20:26:46, 1.36s/it] 13%|█▎ | 7942/61904 [4:07:53<20:34:08, 1.37s/it] 13%|█▎ | 7943/61904 [4:07:54<20:58:42, 1.40s/it] 13%|█▎ | 7944/61904 [4:07:56<20:30:19, 1.37s/it] 13%|█▎ | 7945/61904 [4:07:57<20:01:38, 1.34s/it] 13%|█▎ | 7946/61904 [4:07:58<19:53:06, 1.33s/it] 13%|█▎ | 7947/61904 [4:08:00<20:39:52, 1.38s/it] 13%|█▎ | 7948/61904 [4:08:01<20:32:44, 1.37s/it] 13%|█▎ | 7949/61904 [4:08:02<20:09:56, 1.35s/it] 13%|█▎ | 7950/61904 [4:08:04<20:18:05, 1.35s/it] 13%|█▎ | 7951/61904 [4:08:05<20:21:59, 1.36s/it] 13%|█▎ | 7952/61904 [4:08:06<20:21:25, 1.36s/it] 13%|█▎ | 7953/61904 [4:08:08<20:17:26, 1.35s/it] 13%|█▎ | 7954/61904 [4:08:09<20:13:01, 1.35s/it] 13%|█▎ | 7955/61904 [4:08:10<20:17:14, 1.35s/it] 13%|█▎ | 7956/61904 [4:08:12<20:03:58, 1.34s/it] 13%|█▎ | 7957/61904 [4:08:13<20:22:25, 1.36s/it] 13%|█▎ | 7958/61904 [4:08:14<20:11:33, 1.35s/it] 13%|█▎ | 7959/61904 [4:08:16<20:27:08, 1.36s/it] 13%|█▎ | 7960/61904 [4:08:17<20:22:01, 1.36s/it] {'loss': 2.9649, 'learning_rate': 1.874238298975755e-07, 'epoch': 2.06} + 13%|█▎ | 7960/61904 [4:08:17<20:22:01, 1.36s/it] 13%|█▎ | 7961/61904 [4:08:19<20:15:19, 1.35s/it] 13%|█▎ | 7962/61904 [4:08:20<20:42:26, 1.38s/it] 13%|█▎ | 7963/61904 [4:08:21<20:15:38, 1.35s/it] 13%|█▎ | 7964/61904 [4:08:23<20:04:09, 1.34s/it] 13%|█▎ | 7965/61904 [4:08:24<19:59:20, 1.33s/it] 13%|█▎ | 7966/61904 [4:08:25<21:10:04, 1.41s/it] 13%|█▎ | 7967/61904 [4:08:27<21:17:34, 1.42s/it] 13%|█▎ | 7968/61904 [4:08:28<20:41:52, 1.38s/it] 13%|█▎ | 7969/61904 [4:08:30<20:30:33, 1.37s/it] 13%|█▎ | 7970/61904 [4:08:31<20:21:11, 1.36s/it] 13%|█▎ | 7971/61904 [4:08:32<20:30:30, 1.37s/it] 13%|█▎ | 7972/61904 [4:08:34<20:43:49, 1.38s/it] 13%|█▎ | 7973/61904 [4:08:35<20:26:05, 1.36s/it] 13%|█▎ | 7974/61904 [4:08:36<19:52:14, 1.33s/it] 13%|█▎ | 7975/61904 [4:08:38<21:26:45, 1.43s/it] 13%|█▎ | 7976/61904 [4:08:39<21:03:13, 1.41s/it] 13%|█▎ | 7977/61904 [4:08:41<20:31:19, 1.37s/it] 13%|█▎ | 7978/61904 [4:08:42<20:33:34, 1.37s/it] 13%|█▎ | 7979/61904 [4:08:43<20:05:01, 1.34s/it] 13%|█▎ | 7980/61904 [4:08:44<19:36:53, 1.31s/it] {'loss': 2.8578, 'learning_rate': 1.8739141708803318e-07, 'epoch': 2.06} + 13%|█▎ | 7980/61904 [4:08:44<19:36:53, 1.31s/it] 13%|█▎ | 7981/61904 [4:08:46<19:52:02, 1.33s/it] 13%|█▎ | 7982/61904 [4:08:47<20:07:19, 1.34s/it] 13%|█▎ | 7983/61904 [4:08:49<21:30:44, 1.44s/it] 13%|█▎ | 7984/61904 [4:08:50<21:28:38, 1.43s/it] 13%|█▎ | 7985/61904 [4:08:52<20:51:38, 1.39s/it] 13%|█▎ | 7986/61904 [4:08:53<21:06:39, 1.41s/it] 13%|█▎ | 7987/61904 [4:08:54<21:17:04, 1.42s/it] 13%|█▎ | 7988/61904 [4:08:56<20:34:08, 1.37s/it] 13%|█▎ | 7989/61904 [4:08:57<19:59:32, 1.33s/it] 13%|█▎ | 7990/61904 [4:08:58<20:09:43, 1.35s/it] 13%|█▎ | 7991/61904 [4:09:00<20:00:21, 1.34s/it] 13%|█▎ | 7992/61904 [4:09:01<20:31:22, 1.37s/it] 13%|█▎ | 7993/61904 [4:09:02<19:56:28, 1.33s/it] 13%|█▎ | 7994/61904 [4:09:04<21:03:07, 1.41s/it] 13%|█▎ | 7995/61904 [4:09:05<20:29:43, 1.37s/it] 13%|█▎ | 7996/61904 [4:09:07<20:32:15, 1.37s/it] 13%|█▎ | 7997/61904 [4:09:08<20:05:58, 1.34s/it] 13%|█▎ | 7998/61904 [4:09:09<20:00:38, 1.34s/it] 13%|█▎ | 7999/61904 [4:09:11<20:35:23, 1.38s/it] 13%|█▎ | 8000/61904 [4:09:12<20:48:28, 1.39s/it] {'loss': 2.8477, 'learning_rate': 1.8735900427849085e-07, 'epoch': 2.07} + 13%|█▎ | 8000/61904 [4:09:12<20:48:28, 1.39s/it] 13%|█▎ | 8001/61904 [4:09:14<21:16:00, 1.42s/it] 13%|█▎ | 8002/61904 [4:09:15<20:46:05, 1.39s/it] 13%|█▎ | 8003/61904 [4:09:16<20:34:22, 1.37s/it] 13%|█▎ | 8004/61904 [4:09:17<19:44:50, 1.32s/it] 13%|█▎ | 8005/61904 [4:09:19<20:39:29, 1.38s/it] 13%|█▎ | 8006/61904 [4:09:20<20:28:38, 1.37s/it] 13%|█▎ | 8007/61904 [4:09:22<20:11:15, 1.35s/it] 13%|█▎ | 8008/61904 [4:09:23<20:30:35, 1.37s/it] 13%|█▎ | 8009/61904 [4:09:24<20:26:09, 1.37s/it] 13%|█▎ | 8010/61904 [4:09:26<20:21:37, 1.36s/it] 13%|█▎ | 8011/61904 [4:09:27<20:38:00, 1.38s/it] 13%|█▎ | 8012/61904 [4:09:28<19:54:07, 1.33s/it] 13%|█▎ | 8013/61904 [4:09:30<20:20:11, 1.36s/it] 13%|█▎ | 8014/61904 [4:09:31<21:01:53, 1.40s/it] 13%|█▎ | 8015/61904 [4:09:33<20:55:21, 1.40s/it] 13%|█▎ | 8016/61904 [4:09:34<21:24:43, 1.43s/it] 13%|█▎ | 8017/61904 [4:09:35<20:31:32, 1.37s/it] 13%|█▎ | 8018/61904 [4:09:37<20:04:38, 1.34s/it] 13%|█▎ | 8019/61904 [4:09:38<19:55:51, 1.33s/it] 13%|█▎ | 8020/61904 [4:09:39<19:36:00, 1.31s/it] {'loss': 2.8336, 'learning_rate': 1.873265914689485e-07, 'epoch': 2.07} + 13%|█▎ | 8020/61904 [4:09:39<19:36:00, 1.31s/it] 13%|█▎ | 8021/61904 [4:09:41<20:44:32, 1.39s/it] 13%|█▎ | 8022/61904 [4:09:42<19:59:11, 1.34s/it] 13%|█▎ | 8023/61904 [4:09:43<20:33:59, 1.37s/it] 13%|█▎ | 8024/61904 [4:09:45<20:45:40, 1.39s/it] 13%|█▎ | 8025/61904 [4:09:46<20:27:31, 1.37s/it] 13%|█▎ | 8026/61904 [4:09:48<20:10:19, 1.35s/it] 13%|█▎ | 8027/61904 [4:09:49<21:15:43, 1.42s/it] 13%|█▎ | 8028/61904 [4:09:50<20:57:55, 1.40s/it] 13%|█▎ | 8029/61904 [4:09:52<20:24:52, 1.36s/it] 13%|█▎ | 8030/61904 [4:09:53<20:33:53, 1.37s/it] 13%|█▎ | 8031/61904 [4:09:54<19:53:12, 1.33s/it] 13%|█▎ | 8032/61904 [4:09:56<20:22:04, 1.36s/it] 13%|█▎ | 8033/61904 [4:09:57<19:54:21, 1.33s/it] 13%|█▎ | 8034/61904 [4:09:59<20:35:16, 1.38s/it] 13%|█▎ | 8035/61904 [4:10:00<20:41:00, 1.38s/it] 13%|█▎ | 8036/61904 [4:10:01<20:07:58, 1.35s/it] 13%|█▎ | 8037/61904 [4:10:02<19:32:44, 1.31s/it] 13%|█▎ | 8038/61904 [4:10:04<20:12:57, 1.35s/it] 13%|█▎ | 8039/61904 [4:10:05<19:48:30, 1.32s/it] 13%|█▎ | 8040/61904 [4:10:07<20:33:55, 1.37s/it] {'loss': 2.9005, 'learning_rate': 1.872941786594062e-07, 'epoch': 2.08} + 13%|█▎ | 8040/61904 [4:10:07<20:33:55, 1.37s/it] 13%|█▎ | 8041/61904 [4:10:08<20:12:43, 1.35s/it] 13%|█▎ | 8042/61904 [4:10:09<20:18:35, 1.36s/it] 13%|█▎ | 8043/61904 [4:10:11<19:54:21, 1.33s/it] 13%|█▎ | 8044/61904 [4:10:12<20:07:42, 1.35s/it] 13%|█▎ | 8045/61904 [4:10:13<20:15:46, 1.35s/it] 13%|█▎ | 8046/61904 [4:10:15<20:53:10, 1.40s/it] 13%|█▎ | 8047/61904 [4:10:16<20:50:33, 1.39s/it] 13%|█▎ | 8048/61904 [4:10:18<20:34:52, 1.38s/it] 13%|█▎ | 8049/61904 [4:10:19<21:00:46, 1.40s/it] 13%|█▎ | 8050/61904 [4:10:20<21:04:40, 1.41s/it] 13%|█▎ | 8051/61904 [4:10:22<21:46:49, 1.46s/it] 13%|█▎ | 8052/61904 [4:10:23<21:53:35, 1.46s/it] 13%|█▎ | 8053/61904 [4:10:25<21:30:42, 1.44s/it] 13%|█▎ | 8054/61904 [4:10:26<22:26:25, 1.50s/it] 13%|█▎ | 8055/61904 [4:10:28<22:23:14, 1.50s/it] 13%|█▎ | 8056/61904 [4:10:29<21:33:36, 1.44s/it] 13%|█▎ | 8057/61904 [4:10:31<21:43:16, 1.45s/it] 13%|█▎ | 8058/61904 [4:10:32<20:40:15, 1.38s/it] 13%|█▎ | 8059/61904 [4:10:33<20:08:43, 1.35s/it] 13%|█▎ | 8060/61904 [4:10:35<20:27:47, 1.37s/it] {'loss': 2.9426, 'learning_rate': 1.8726176584986386e-07, 'epoch': 2.08} + 13%|█▎ | 8060/61904 [4:10:35<20:27:47, 1.37s/it] 13%|█▎ | 8061/61904 [4:10:36<20:19:36, 1.36s/it] 13%|█▎ | 8062/61904 [4:10:37<20:13:28, 1.35s/it] 13%|█▎ | 8063/61904 [4:10:39<20:15:23, 1.35s/it] 13%|█▎ | 8064/61904 [4:10:40<20:06:22, 1.34s/it] 13%|█▎ | 8065/61904 [4:10:41<19:51:14, 1.33s/it] 13%|█▎ | 8066/61904 [4:10:43<19:56:58, 1.33s/it] 13%|█▎ | 8067/61904 [4:10:44<20:06:41, 1.34s/it] 13%|█▎ | 8068/61904 [4:10:45<19:47:27, 1.32s/it] 13%|█▎ | 8069/61904 [4:10:47<20:20:08, 1.36s/it] 13%|█▎ | 8070/61904 [4:10:48<21:19:54, 1.43s/it] 13%|█▎ | 8071/61904 [4:10:50<21:18:39, 1.43s/it] 13%|█▎ | 8072/61904 [4:10:51<21:15:48, 1.42s/it] 13%|█▎ | 8073/61904 [4:10:53<21:30:43, 1.44s/it] 13%|█▎ | 8074/61904 [4:10:54<20:59:23, 1.40s/it] 13%|█▎ | 8075/61904 [4:10:56<21:46:53, 1.46s/it] 13%|█▎ | 8076/61904 [4:10:57<21:01:25, 1.41s/it] 13%|█▎ | 8077/61904 [4:10:58<21:23:49, 1.43s/it] 13%|█▎ | 8078/61904 [4:11:00<20:49:56, 1.39s/it] 13%|█▎ | 8079/61904 [4:11:01<20:08:54, 1.35s/it] 13%|█▎ | 8080/61904 [4:11:02<19:45:58, 1.32s/it] {'loss': 2.8823, 'learning_rate': 1.8722935304032152e-07, 'epoch': 2.09} + 13%|█▎ | 8080/61904 [4:11:02<19:45:58, 1.32s/it] 13%|█▎ | 8081/61904 [4:11:04<19:57:16, 1.33s/it] 13%|█▎ | 8082/61904 [4:11:05<20:39:25, 1.38s/it] 13%|█▎ | 8083/61904 [4:11:06<20:47:22, 1.39s/it] 13%|█▎ | 8084/61904 [4:11:08<21:13:04, 1.42s/it] 13%|█▎ | 8085/61904 [4:11:09<20:21:29, 1.36s/it] 13%|█▎ | 8086/61904 [4:11:10<20:26:13, 1.37s/it] 13%|█▎ | 8087/61904 [4:11:12<20:51:21, 1.40s/it] 13%|█▎ | 8088/61904 [4:11:13<20:05:25, 1.34s/it] 13%|█▎ | 8089/61904 [4:11:15<20:37:04, 1.38s/it] 13%|█▎ | 8090/61904 [4:11:16<20:25:29, 1.37s/it] 13%|█▎ | 8091/61904 [4:11:17<20:10:49, 1.35s/it] 13%|█▎ | 8092/61904 [4:11:19<19:58:30, 1.34s/it] 13%|█▎ | 8093/61904 [4:11:20<20:23:28, 1.36s/it] 13%|█▎ | 8094/61904 [4:11:21<20:24:13, 1.37s/it] 13%|█▎ | 8095/61904 [4:11:23<20:06:59, 1.35s/it] 13%|█▎ | 8096/61904 [4:11:24<20:29:06, 1.37s/it] 13%|█▎ | 8097/61904 [4:11:26<20:32:42, 1.37s/it] 13%|█▎ | 8098/61904 [4:11:27<20:33:28, 1.38s/it] 13%|█▎ | 8099/61904 [4:11:28<19:54:47, 1.33s/it] 13%|█▎ | 8100/61904 [4:11:29<19:36:46, 1.31s/it] {'loss': 2.8766, 'learning_rate': 1.871969402307792e-07, 'epoch': 2.09} + 13%|█▎ | 8100/61904 [4:11:29<19:36:46, 1.31s/it] 13%|█▎ | 8101/61904 [4:11:31<19:32:59, 1.31s/it] 13%|█▎ | 8102/61904 [4:11:32<19:41:34, 1.32s/it] 13%|█▎ | 8103/61904 [4:11:33<19:45:47, 1.32s/it] 13%|█▎ | 8104/61904 [4:11:35<19:47:11, 1.32s/it] 13%|█▎ | 8105/61904 [4:11:36<20:14:22, 1.35s/it] 13%|█▎ | 8106/61904 [4:11:37<20:12:25, 1.35s/it] 13%|█▎ | 8107/61904 [4:11:39<20:03:53, 1.34s/it] 13%|█▎ | 8108/61904 [4:11:40<19:52:03, 1.33s/it] 13%|█▎ | 8109/61904 [4:11:41<19:52:35, 1.33s/it] 13%|█▎ | 8110/61904 [4:11:43<19:23:12, 1.30s/it] 13%|█▎ | 8111/61904 [4:11:44<19:57:02, 1.34s/it] 13%|█▎ | 8112/61904 [4:11:45<19:53:28, 1.33s/it] 13%|█▎ | 8113/61904 [4:11:47<20:27:34, 1.37s/it] 13%|█▎ | 8114/61904 [4:11:48<20:27:30, 1.37s/it] 13%|█▎ | 8115/61904 [4:11:50<20:56:59, 1.40s/it] 13%|█▎ | 8116/61904 [4:11:51<20:20:31, 1.36s/it] 13%|█▎ | 8117/61904 [4:11:52<20:04:33, 1.34s/it] 13%|█▎ | 8118/61904 [4:11:54<19:47:24, 1.32s/it] 13%|█▎ | 8119/61904 [4:11:55<19:59:07, 1.34s/it] 13%|█▎ | 8120/61904 [4:11:56<20:23:02, 1.36s/it] {'loss': 2.8903, 'learning_rate': 1.8716452742123684e-07, 'epoch': 2.1} + 13%|█▎ | 8120/61904 [4:11:56<20:23:02, 1.36s/it] 13%|█▎ | 8121/61904 [4:11:58<20:11:10, 1.35s/it] 13%|█▎ | 8122/61904 [4:11:59<19:54:03, 1.33s/it] 13%|█▎ | 8123/61904 [4:12:00<19:57:58, 1.34s/it] 13%|█▎ | 8124/61904 [4:12:02<19:58:36, 1.34s/it] 13%|█▎ | 8125/61904 [4:12:03<20:01:23, 1.34s/it] 13%|█▎ | 8126/61904 [4:12:04<20:19:55, 1.36s/it] 13%|█▎ | 8127/61904 [4:12:06<19:50:26, 1.33s/it] 13%|█▎ | 8128/61904 [4:12:07<19:46:14, 1.32s/it] 13%|█▎ | 8129/61904 [4:12:08<19:38:42, 1.32s/it] 13%|█▎ | 8130/61904 [4:12:10<19:31:51, 1.31s/it] 13%|█▎ | 8131/61904 [4:12:11<19:47:12, 1.32s/it] 13%|█▎ | 8132/61904 [4:12:12<19:25:37, 1.30s/it] 13%|█▎ | 8133/61904 [4:12:13<19:41:08, 1.32s/it] 13%|█▎ | 8134/61904 [4:12:15<20:03:22, 1.34s/it] 13%|█▎ | 8135/61904 [4:12:16<19:41:20, 1.32s/it] 13%|█▎ | 8136/61904 [4:12:18<19:56:50, 1.34s/it] 13%|█▎ | 8137/61904 [4:12:19<20:08:53, 1.35s/it] 13%|█▎ | 8138/61904 [4:12:20<20:16:19, 1.36s/it] 13%|█▎ | 8139/61904 [4:12:22<21:12:21, 1.42s/it] 13%|█▎ | 8140/61904 [4:12:23<21:31:27, 1.44s/it] {'loss': 2.9383, 'learning_rate': 1.8713211461169453e-07, 'epoch': 2.1} + 13%|█▎ | 8140/61904 [4:12:23<21:31:27, 1.44s/it] 13%|█▎ | 8141/61904 [4:12:25<21:21:13, 1.43s/it] 13%|█▎ | 8142/61904 [4:12:26<20:52:12, 1.40s/it] 13%|█▎ | 8143/61904 [4:12:27<20:28:43, 1.37s/it] 13%|█▎ | 8144/61904 [4:12:29<21:11:22, 1.42s/it] 13%|█▎ | 8145/61904 [4:12:30<21:05:23, 1.41s/it] 13%|█▎ | 8146/61904 [4:12:32<20:50:35, 1.40s/it] 13%|█▎ | 8147/61904 [4:12:33<20:29:28, 1.37s/it] 13%|█▎ | 8148/61904 [4:12:34<20:09:09, 1.35s/it] 13%|█▎ | 8149/61904 [4:12:36<20:43:03, 1.39s/it] 13%|█▎ | 8150/61904 [4:12:37<20:58:31, 1.40s/it] 13%|█▎ | 8151/61904 [4:12:38<20:15:55, 1.36s/it] 13%|█▎ | 8152/61904 [4:12:40<20:37:08, 1.38s/it] 13%|█▎ | 8153/61904 [4:12:41<20:42:37, 1.39s/it] 13%|█▎ | 8154/61904 [4:12:43<21:25:55, 1.44s/it] 13%|█▎ | 8155/61904 [4:12:44<21:41:59, 1.45s/it] 13%|█▎ | 8156/61904 [4:12:46<22:02:21, 1.48s/it] 13%|█▎ | 8157/61904 [4:12:47<21:16:47, 1.43s/it] 13%|█▎ | 8158/61904 [4:12:49<21:00:36, 1.41s/it] 13%|█▎ | 8159/61904 [4:12:50<20:35:53, 1.38s/it] 13%|█▎ | 8160/61904 [4:12:51<19:56:01, 1.34s/it] {'loss': 2.8702, 'learning_rate': 1.870997018021522e-07, 'epoch': 2.11} + 13%|█▎ | 8160/61904 [4:12:51<19:56:01, 1.34s/it] 13%|█▎ | 8161/61904 [4:12:52<19:37:50, 1.31s/it] 13%|█▎ | 8162/61904 [4:12:54<19:42:25, 1.32s/it] 13%|█▎ | 8163/61904 [4:12:55<20:25:54, 1.37s/it] 13%|█▎ | 8164/61904 [4:12:57<20:52:36, 1.40s/it] 13%|█▎ | 8165/61904 [4:12:58<20:47:20, 1.39s/it] 13%|█▎ | 8166/61904 [4:12:59<20:29:38, 1.37s/it] 13%|█▎ | 8167/61904 [4:13:01<19:39:35, 1.32s/it] 13%|█▎ | 8168/61904 [4:13:02<19:16:57, 1.29s/it] 13%|█▎ | 8169/61904 [4:13:03<19:23:50, 1.30s/it] 13%|█▎ | 8170/61904 [4:13:04<19:14:55, 1.29s/it] 13%|█▎ | 8171/61904 [4:13:06<19:19:52, 1.30s/it] 13%|█▎ | 8172/61904 [4:13:07<19:30:15, 1.31s/it] 13%|█▎ | 8173/61904 [4:13:08<20:01:00, 1.34s/it] 13%|█▎ | 8174/61904 [4:13:10<20:02:18, 1.34s/it] 13%|█▎ | 8175/61904 [4:13:11<19:55:58, 1.34s/it] 13%|█▎ | 8176/61904 [4:13:12<19:36:19, 1.31s/it] 13%|█▎ | 8177/61904 [4:13:14<19:42:44, 1.32s/it] 13%|█▎ | 8178/61904 [4:13:15<20:06:40, 1.35s/it] 13%|█▎ | 8179/61904 [4:13:17<21:00:53, 1.41s/it] 13%|█▎ | 8180/61904 [4:13:18<20:04:43, 1.35s/it] {'loss': 2.8984, 'learning_rate': 1.8706728899260986e-07, 'epoch': 2.11} + 13%|█▎ | 8180/61904 [4:13:18<20:04:43, 1.35s/it] 13%|█▎ | 8181/61904 [4:13:19<19:54:14, 1.33s/it] 13%|█▎ | 8182/61904 [4:13:21<20:24:21, 1.37s/it] 13%|█▎ | 8183/61904 [4:13:22<20:17:02, 1.36s/it] 13%|█▎ | 8184/61904 [4:13:23<19:48:28, 1.33s/it] 13%|█▎ | 8185/61904 [4:13:25<20:32:07, 1.38s/it] 13%|█▎ | 8186/61904 [4:13:26<20:37:46, 1.38s/it] 13%|█▎ | 8187/61904 [4:13:27<20:05:20, 1.35s/it] 13%|█▎ | 8188/61904 [4:13:29<19:52:59, 1.33s/it] 13%|█▎ | 8189/61904 [4:13:30<20:02:24, 1.34s/it] 13%|█▎ | 8190/61904 [4:13:32<20:59:06, 1.41s/it] 13%|█▎ | 8191/61904 [4:13:33<21:21:00, 1.43s/it] 13%|█▎ | 8192/61904 [4:13:34<20:50:17, 1.40s/it] 13%|█▎ | 8193/61904 [4:13:36<20:30:49, 1.37s/it] 13%|█▎ | 8194/61904 [4:13:37<20:04:25, 1.35s/it] 13%|█▎ | 8195/61904 [4:13:38<19:44:01, 1.32s/it] 13%|█▎ | 8196/61904 [4:13:40<20:04:31, 1.35s/it] 13%|█▎ | 8197/61904 [4:13:41<20:07:46, 1.35s/it] 13%|█▎ | 8198/61904 [4:13:42<20:11:35, 1.35s/it] 13%|█▎ | 8199/61904 [4:13:44<20:06:27, 1.35s/it] 13%|█▎ | 8200/61904 [4:13:45<20:31:45, 1.38s/it] {'loss': 2.934, 'learning_rate': 1.8703487618306754e-07, 'epoch': 2.12} + 13%|█▎ | 8200/61904 [4:13:45<20:31:45, 1.38s/it] 13%|█▎ | 8201/61904 [4:13:47<20:44:12, 1.39s/it] 13%|█▎ | 8202/61904 [4:13:48<21:15:29, 1.43s/it] 13%|█▎ | 8203/61904 [4:13:49<20:24:41, 1.37s/it] 13%|█▎ | 8204/61904 [4:13:51<20:41:41, 1.39s/it] 13%|█▎ | 8205/61904 [4:13:52<20:38:09, 1.38s/it] 13%|█▎ | 8206/61904 [4:13:53<20:16:02, 1.36s/it] 13%|█▎ | 8207/61904 [4:13:55<20:04:46, 1.35s/it] 13%|█▎ | 8208/61904 [4:13:56<19:46:50, 1.33s/it] 13%|█▎ | 8209/61904 [4:13:57<19:52:11, 1.33s/it] 13%|█▎ | 8210/61904 [4:13:59<20:49:26, 1.40s/it] 13%|█▎ | 8211/61904 [4:14:00<20:34:06, 1.38s/it] 13%|█▎ | 8212/61904 [4:14:02<20:10:58, 1.35s/it] 13%|█▎ | 8213/61904 [4:14:03<20:17:46, 1.36s/it] 13%|█▎ | 8214/61904 [4:14:04<20:33:53, 1.38s/it] 13%|█▎ | 8215/61904 [4:14:06<21:22:17, 1.43s/it] 13%|█▎ | 8216/61904 [4:14:07<21:05:48, 1.41s/it] 13%|█▎ | 8217/61904 [4:14:09<20:51:43, 1.40s/it] 13%|█▎ | 8218/61904 [4:14:10<20:57:45, 1.41s/it] 13%|█▎ | 8219/61904 [4:14:11<20:27:43, 1.37s/it] 13%|█▎ | 8220/61904 [4:14:13<19:55:11, 1.34s/it] {'loss': 2.8964, 'learning_rate': 1.870024633735252e-07, 'epoch': 2.12} + 13%|█▎ | 8220/61904 [4:14:13<19:55:11, 1.34s/it] 13%|█▎ | 8221/61904 [4:14:14<20:01:06, 1.34s/it] 13%|█▎ | 8222/61904 [4:14:15<20:17:20, 1.36s/it] 13%|█▎ | 8223/61904 [4:14:17<20:12:03, 1.35s/it] 13%|█▎ | 8224/61904 [4:14:18<20:13:29, 1.36s/it] 13%|█▎ | 8225/61904 [4:14:19<20:44:23, 1.39s/it] 13%|█▎ | 8226/61904 [4:14:21<20:41:51, 1.39s/it] 13%|█▎ | 8227/61904 [4:14:22<20:29:37, 1.37s/it] 13%|█▎ | 8228/61904 [4:14:24<20:11:28, 1.35s/it] 13%|█▎ | 8229/61904 [4:14:25<20:49:40, 1.40s/it] 13%|█▎ | 8230/61904 [4:14:26<20:57:24, 1.41s/it] 13%|█▎ | 8231/61904 [4:14:28<20:47:22, 1.39s/it] 13%|█▎ | 8232/61904 [4:14:29<20:56:42, 1.40s/it] 13%|█▎ | 8233/61904 [4:14:31<20:43:11, 1.39s/it] 13%|█▎ | 8234/61904 [4:14:32<20:21:58, 1.37s/it] 13%|█▎ | 8235/61904 [4:14:33<20:20:29, 1.36s/it] 13%|█▎ | 8236/61904 [4:14:35<20:04:27, 1.35s/it] 13%|█▎ | 8237/61904 [4:14:36<20:20:36, 1.36s/it] 13%|█▎ | 8238/61904 [4:14:37<20:40:55, 1.39s/it] 13%|█▎ | 8239/61904 [4:14:39<20:28:50, 1.37s/it] 13%|█▎ | 8240/61904 [4:14:40<20:42:56, 1.39s/it] {'loss': 2.887, 'learning_rate': 1.8697005056398287e-07, 'epoch': 2.13} + 13%|█▎ | 8240/61904 [4:14:40<20:42:56, 1.39s/it] 13%|█▎ | 8241/61904 [4:14:42<21:13:20, 1.42s/it] 13%|█▎ | 8242/61904 [4:14:43<20:57:59, 1.41s/it] 13%|█▎ | 8243/61904 [4:14:44<20:11:31, 1.35s/it] 13%|█▎ | 8244/61904 [4:14:46<19:53:41, 1.33s/it] 13%|█▎ | 8245/61904 [4:14:47<19:59:46, 1.34s/it] 13%|█▎ | 8246/61904 [4:14:48<20:44:02, 1.39s/it] 13%|█▎ | 8247/61904 [4:14:50<20:40:36, 1.39s/it] 13%|█▎ | 8248/61904 [4:14:51<21:02:44, 1.41s/it] 13%|█▎ | 8249/61904 [4:14:53<21:19:49, 1.43s/it] 13%|█▎ | 8250/61904 [4:14:54<20:34:28, 1.38s/it] 13%|█▎ | 8251/61904 [4:14:56<21:17:21, 1.43s/it] 13%|█▎ | 8252/61904 [4:14:57<20:39:04, 1.39s/it] 13%|█▎ | 8253/61904 [4:14:58<20:44:52, 1.39s/it] 13%|█▎ | 8254/61904 [4:15:00<20:22:45, 1.37s/it] 13%|█▎ | 8255/61904 [4:15:01<21:01:11, 1.41s/it] 13%|█▎ | 8256/61904 [4:15:02<20:55:12, 1.40s/it] 13%|█▎ | 8257/61904 [4:15:04<20:15:31, 1.36s/it] 13%|█▎ | 8258/61904 [4:15:05<20:26:41, 1.37s/it] 13%|█▎ | 8259/61904 [4:15:07<20:31:17, 1.38s/it] 13%|█▎ | 8260/61904 [4:15:08<21:06:13, 1.42s/it] {'loss': 2.8915, 'learning_rate': 1.8693763775444056e-07, 'epoch': 2.13} + 13%|█▎ | 8260/61904 [4:15:08<21:06:13, 1.42s/it] 13%|█▎ | 8261/61904 [4:15:09<20:43:09, 1.39s/it] 13%|█▎ | 8262/61904 [4:15:11<21:28:23, 1.44s/it] 13%|█▎ | 8263/61904 [4:15:12<20:50:49, 1.40s/it] 13%|█▎ | 8264/61904 [4:15:14<20:37:46, 1.38s/it] 13%|█▎ | 8265/61904 [4:15:15<20:33:51, 1.38s/it] 13%|█▎ | 8266/61904 [4:15:16<21:08:03, 1.42s/it] 13%|█▎ | 8267/61904 [4:15:18<20:59:42, 1.41s/it] 13%|█▎ | 8268/61904 [4:15:19<21:57:25, 1.47s/it] 13%|█▎ | 8269/61904 [4:15:21<21:49:28, 1.46s/it] 13%|█▎ | 8270/61904 [4:15:22<21:28:35, 1.44s/it] 13%|█▎ | 8271/61904 [4:15:24<21:29:13, 1.44s/it] 13%|█▎ | 8272/61904 [4:15:25<21:13:24, 1.42s/it] 13%|█▎ | 8273/61904 [4:15:26<20:52:05, 1.40s/it] 13%|█▎ | 8274/61904 [4:15:28<21:44:27, 1.46s/it] 13%|█▎ | 8275/61904 [4:15:29<21:20:32, 1.43s/it] 13%|█▎ | 8276/61904 [4:15:31<21:13:19, 1.42s/it] 13%|█▎ | 8277/61904 [4:15:32<21:07:08, 1.42s/it] 13%|█▎ | 8278/61904 [4:15:34<21:03:37, 1.41s/it] 13%|█▎ | 8279/61904 [4:15:35<20:34:22, 1.38s/it] 13%|█▎ | 8280/61904 [4:15:36<20:34:23, 1.38s/it] {'loss': 2.9201, 'learning_rate': 1.8690522494489822e-07, 'epoch': 2.14} + 13%|█▎ | 8280/61904 [4:15:36<20:34:23, 1.38s/it] 13%|█▎ | 8281/61904 [4:15:38<20:30:57, 1.38s/it] 13%|█▎ | 8282/61904 [4:15:39<20:37:06, 1.38s/it] 13%|█▎ | 8283/61904 [4:15:41<21:00:54, 1.41s/it] 13%|█▎ | 8284/61904 [4:15:42<20:42:57, 1.39s/it] 13%|█▎ | 8285/61904 [4:15:43<20:40:58, 1.39s/it] 13%|█▎ | 8286/61904 [4:15:45<20:03:55, 1.35s/it] 13%|█▎ | 8287/61904 [4:15:46<19:52:36, 1.33s/it] 13%|█▎ | 8288/61904 [4:15:47<19:50:36, 1.33s/it] 13%|█▎ | 8289/61904 [4:15:49<20:04:00, 1.35s/it] 13%|█▎ | 8290/61904 [4:15:50<19:49:22, 1.33s/it] 13%|█▎ | 8291/61904 [4:15:51<19:26:51, 1.31s/it] 13%|█▎ | 8292/61904 [4:15:53<19:54:16, 1.34s/it] 13%|█▎ | 8293/61904 [4:15:54<20:08:08, 1.35s/it] 13%|█▎ | 8294/61904 [4:15:55<20:32:36, 1.38s/it] 13%|█▎ | 8295/61904 [4:15:57<20:05:28, 1.35s/it] 13%|█▎ | 8296/61904 [4:15:58<20:21:00, 1.37s/it] 13%|█▎ | 8297/61904 [4:15:59<20:11:34, 1.36s/it] 13%|█▎ | 8298/61904 [4:16:01<21:02:13, 1.41s/it] 13%|█▎ | 8299/61904 [4:16:02<21:18:06, 1.43s/it] 13%|█▎ | 8300/61904 [4:16:04<20:42:02, 1.39s/it] {'loss': 2.8616, 'learning_rate': 1.8687281213535588e-07, 'epoch': 2.14} + 13%|█▎ | 8300/61904 [4:16:04<20:42:02, 1.39s/it] 13%|█▎ | 8301/61904 [4:16:05<21:02:02, 1.41s/it] 13%|█▎ | 8302/61904 [4:16:07<20:48:53, 1.40s/it] 13%|█▎ | 8303/61904 [4:16:08<20:19:41, 1.37s/it] 13%|█▎ | 8304/61904 [4:16:09<21:05:59, 1.42s/it] 13%|█▎ | 8305/61904 [4:16:11<21:04:03, 1.42s/it] 13%|█▎ | 8306/61904 [4:16:12<20:46:10, 1.40s/it] 13%|█▎ | 8307/61904 [4:16:14<21:13:55, 1.43s/it] 13%|█▎ | 8308/61904 [4:16:15<20:10:25, 1.36s/it] 13%|█▎ | 8309/61904 [4:16:16<20:59:41, 1.41s/it] 13%|█▎ | 8310/61904 [4:16:18<21:11:56, 1.42s/it] 13%|█▎ | 8311/61904 [4:16:19<21:04:48, 1.42s/it] 13%|█▎ | 8312/61904 [4:16:20<20:35:08, 1.38s/it] 13%|█▎ | 8313/61904 [4:16:22<20:35:30, 1.38s/it] 13%|█▎ | 8314/61904 [4:16:23<20:21:11, 1.37s/it] 13%|█▎ | 8315/61904 [4:16:24<19:59:03, 1.34s/it] 13%|█▎ | 8316/61904 [4:16:26<20:17:31, 1.36s/it] 13%|█▎ | 8317/61904 [4:16:27<20:12:16, 1.36s/it] 13%|█▎ | 8318/61904 [4:16:29<20:12:40, 1.36s/it] 13%|█▎ | 8319/61904 [4:16:30<19:35:59, 1.32s/it] 13%|█▎ | 8320/61904 [4:16:31<19:46:21, 1.33s/it] {'loss': 2.8828, 'learning_rate': 1.8684039932581354e-07, 'epoch': 2.15} + 13%|█▎ | 8320/61904 [4:16:31<19:46:21, 1.33s/it] 13%|█▎ | 8321/61904 [4:16:32<19:41:38, 1.32s/it] 13%|█▎ | 8322/61904 [4:16:34<19:53:53, 1.34s/it] 13%|█▎ | 8323/61904 [4:16:35<20:28:05, 1.38s/it] 13%|█▎ | 8324/61904 [4:16:37<20:54:46, 1.41s/it] 13%|█▎ | 8325/61904 [4:16:38<21:06:31, 1.42s/it] 13%|█▎ | 8326/61904 [4:16:40<20:56:38, 1.41s/it] 13%|█▎ | 8327/61904 [4:16:41<21:08:35, 1.42s/it] 13%|█▎ | 8328/61904 [4:16:42<21:08:01, 1.42s/it] 13%|█▎ | 8329/61904 [4:16:44<21:01:58, 1.41s/it] 13%|█▎ | 8330/61904 [4:16:45<20:22:16, 1.37s/it] 13%|█▎ | 8331/61904 [4:16:46<20:10:01, 1.36s/it] 13%|█▎ | 8332/61904 [4:16:48<20:16:51, 1.36s/it] 13%|█▎ | 8333/61904 [4:16:49<20:27:38, 1.37s/it] 13%|█▎ | 8334/61904 [4:16:50<19:46:43, 1.33s/it] 13%|█▎ | 8335/61904 [4:16:52<19:18:14, 1.30s/it] 13%|█▎ | 8336/61904 [4:16:53<19:48:07, 1.33s/it] 13%|█▎ | 8337/61904 [4:16:54<19:33:43, 1.31s/it] 13%|█▎ | 8338/61904 [4:16:56<19:29:30, 1.31s/it] 13%|█▎ | 8339/61904 [4:16:57<19:57:22, 1.34s/it] 13%|█▎ | 8340/61904 [4:16:58<19:51:35, 1.33s/it] {'loss': 2.8614, 'learning_rate': 1.868079865162712e-07, 'epoch': 2.16} + 13%|█▎ | 8340/61904 [4:16:58<19:51:35, 1.33s/it] 13%|█▎ | 8341/61904 [4:17:00<19:37:51, 1.32s/it] 13%|█▎ | 8342/61904 [4:17:02<21:47:13, 1.46s/it] 13%|█▎ | 8343/61904 [4:17:03<22:14:03, 1.49s/it] 13%|█▎ | 8344/61904 [4:17:04<21:22:32, 1.44s/it] 13%|█▎ | 8345/61904 [4:17:06<21:26:00, 1.44s/it] 13%|█▎ | 8346/61904 [4:17:07<21:19:31, 1.43s/it] 13%|█▎ | 8347/61904 [4:17:09<20:58:07, 1.41s/it] 13%|█▎ | 8348/61904 [4:17:10<20:00:35, 1.35s/it] 13%|█▎ | 8349/61904 [4:17:11<20:50:44, 1.40s/it] 13%|█▎ | 8350/61904 [4:17:13<20:56:38, 1.41s/it] 13%|█▎ | 8351/61904 [4:17:14<21:31:11, 1.45s/it] 13%|█▎ | 8352/61904 [4:17:16<21:15:26, 1.43s/it] 13%|█▎ | 8353/61904 [4:17:17<21:33:29, 1.45s/it] 13%|█▎ | 8354/61904 [4:17:19<21:06:49, 1.42s/it] 13%|█▎ | 8355/61904 [4:17:20<20:52:41, 1.40s/it] 13%|█▎ | 8356/61904 [4:17:21<20:14:21, 1.36s/it] 13%|█▎ | 8357/61904 [4:17:22<19:59:09, 1.34s/it] 14%|█▎ | 8358/61904 [4:17:24<19:47:06, 1.33s/it] 14%|█▎ | 8359/61904 [4:17:25<19:45:20, 1.33s/it] 14%|█▎ | 8360/61904 [4:17:26<19:16:51, 1.30s/it] {'loss': 2.8295, 'learning_rate': 1.867755737067289e-07, 'epoch': 2.16} + 14%|█▎ | 8360/61904 [4:17:26<19:16:51, 1.30s/it] 14%|█▎ | 8361/61904 [4:17:28<19:55:24, 1.34s/it] 14%|█▎ | 8362/61904 [4:17:29<20:17:28, 1.36s/it] 14%|█▎ | 8363/61904 [4:17:30<19:47:02, 1.33s/it] 14%|█▎ | 8364/61904 [4:17:32<20:15:57, 1.36s/it] 14%|█▎ | 8365/61904 [4:17:33<20:28:01, 1.38s/it] 14%|█▎ | 8366/61904 [4:17:35<21:07:20, 1.42s/it] 14%|█▎ | 8367/61904 [4:17:36<21:04:24, 1.42s/it] 14%|█▎ | 8368/61904 [4:17:38<20:50:37, 1.40s/it] 14%|█▎ | 8369/61904 [4:17:39<20:47:55, 1.40s/it] 14%|█▎ | 8370/61904 [4:17:40<20:25:28, 1.37s/it] 14%|█▎ | 8371/61904 [4:17:41<19:48:18, 1.33s/it] 14%|█▎ | 8372/61904 [4:17:43<21:01:25, 1.41s/it] 14%|█▎ | 8373/61904 [4:17:44<20:07:21, 1.35s/it] 14%|█▎ | 8374/61904 [4:17:46<19:36:50, 1.32s/it] 14%|█▎ | 8375/61904 [4:17:47<20:31:10, 1.38s/it] 14%|█▎ | 8376/61904 [4:17:48<20:19:16, 1.37s/it] 14%|█▎ | 8377/61904 [4:17:50<20:39:46, 1.39s/it] 14%|█▎ | 8378/61904 [4:17:51<20:51:45, 1.40s/it] 14%|█▎ | 8379/61904 [4:17:53<21:49:46, 1.47s/it] 14%|█▎ | 8380/61904 [4:17:54<21:55:43, 1.47s/it] {'loss': 2.8375, 'learning_rate': 1.8674316089718655e-07, 'epoch': 2.17} + 14%|█▎ | 8380/61904 [4:17:54<21:55:43, 1.47s/it] 14%|█▎ | 8381/61904 [4:17:56<21:41:50, 1.46s/it] 14%|█▎ | 8382/61904 [4:17:57<21:02:19, 1.42s/it] 14%|█▎ | 8383/61904 [4:17:58<20:42:18, 1.39s/it] 14%|█▎ | 8384/61904 [4:18:00<20:11:55, 1.36s/it] 14%|█▎ | 8385/61904 [4:18:01<20:15:59, 1.36s/it] 14%|█▎ | 8386/61904 [4:18:02<20:05:34, 1.35s/it] 14%|█▎ | 8387/61904 [4:18:04<20:13:39, 1.36s/it] 14%|█▎ | 8388/61904 [4:18:05<20:46:05, 1.40s/it] 14%|█▎ | 8389/61904 [4:18:07<20:27:36, 1.38s/it] 14%|█▎ | 8390/61904 [4:18:08<19:58:55, 1.34s/it] 14%|█▎ | 8391/61904 [4:18:09<19:53:01, 1.34s/it] 14%|█▎ | 8392/61904 [4:18:11<19:47:47, 1.33s/it] 14%|█▎ | 8393/61904 [4:18:12<19:30:10, 1.31s/it] 14%|█▎ | 8394/61904 [4:18:13<20:10:18, 1.36s/it] 14%|█▎ | 8395/61904 [4:18:15<20:09:13, 1.36s/it] 14%|█▎ | 8396/61904 [4:18:16<19:58:41, 1.34s/it] 14%|█▎ | 8397/61904 [4:18:17<19:47:33, 1.33s/it] 14%|█▎ | 8398/61904 [4:18:19<21:02:58, 1.42s/it] 14%|█▎ | 8399/61904 [4:18:20<21:41:14, 1.46s/it] 14%|█▎ | 8400/61904 [4:18:22<20:28:52, 1.38s/it] {'loss': 2.844, 'learning_rate': 1.8671074808764422e-07, 'epoch': 2.17} + 14%|█▎ | 8400/61904 [4:18:22<20:28:52, 1.38s/it] 14%|█▎ | 8401/61904 [4:18:23<19:56:54, 1.34s/it] 14%|█▎ | 8402/61904 [4:18:24<19:44:42, 1.33s/it] 14%|█▎ | 8403/61904 [4:18:25<19:29:14, 1.31s/it] 14%|█▎ | 8404/61904 [4:18:27<19:24:14, 1.31s/it] 14%|█▎ | 8405/61904 [4:18:28<19:37:46, 1.32s/it] 14%|█▎ | 8406/61904 [4:18:29<19:17:29, 1.30s/it] 14%|█▎ | 8407/61904 [4:18:31<20:21:17, 1.37s/it] 14%|█▎ | 8408/61904 [4:18:32<20:15:46, 1.36s/it] 14%|█▎ | 8409/61904 [4:18:34<19:56:46, 1.34s/it] 14%|█▎ | 8410/61904 [4:18:35<19:44:37, 1.33s/it] 14%|█▎ | 8411/61904 [4:18:36<20:07:31, 1.35s/it] 14%|█▎ | 8412/61904 [4:18:38<20:21:38, 1.37s/it] 14%|█▎ | 8413/61904 [4:18:39<20:16:13, 1.36s/it] 14%|█▎ | 8414/61904 [4:18:40<20:01:37, 1.35s/it] 14%|█▎ | 8415/61904 [4:18:42<20:32:38, 1.38s/it] 14%|█▎ | 8416/61904 [4:18:43<20:47:22, 1.40s/it] 14%|█▎ | 8417/61904 [4:18:44<20:02:53, 1.35s/it] 14%|█▎ | 8418/61904 [4:18:46<19:39:22, 1.32s/it] 14%|█▎ | 8419/61904 [4:18:47<20:36:42, 1.39s/it] 14%|█▎ | 8420/61904 [4:18:49<20:40:19, 1.39s/it] {'loss': 2.8151, 'learning_rate': 1.866783352781019e-07, 'epoch': 2.18} + 14%|█▎ | 8420/61904 [4:18:49<20:40:19, 1.39s/it] 14%|█▎ | 8421/61904 [4:18:50<21:07:53, 1.42s/it] 14%|█▎ | 8422/61904 [4:18:51<20:19:39, 1.37s/it] 14%|█▎ | 8423/61904 [4:18:53<20:25:18, 1.37s/it] 14%|█▎ | 8424/61904 [4:18:54<19:56:22, 1.34s/it] 14%|█▎ | 8425/61904 [4:18:56<20:41:13, 1.39s/it] 14%|█▎ | 8426/61904 [4:18:57<20:11:02, 1.36s/it] 14%|█▎ | 8427/61904 [4:18:58<20:49:40, 1.40s/it] 14%|█▎ | 8428/61904 [4:19:00<20:04:17, 1.35s/it] 14%|█▎ | 8429/61904 [4:19:01<19:43:12, 1.33s/it] 14%|█▎ | 8430/61904 [4:19:02<20:50:42, 1.40s/it] 14%|█▎ | 8431/61904 [4:19:04<20:31:42, 1.38s/it] 14%|█▎ | 8432/61904 [4:19:05<19:59:17, 1.35s/it] 14%|█▎ | 8433/61904 [4:19:06<20:24:13, 1.37s/it] 14%|█▎ | 8434/61904 [4:19:08<20:38:59, 1.39s/it] 14%|█▎ | 8435/61904 [4:19:09<21:19:38, 1.44s/it] 14%|█▎ | 8436/61904 [4:19:11<21:01:30, 1.42s/it] 14%|█▎ | 8437/61904 [4:19:12<20:08:05, 1.36s/it] 14%|█▎ | 8438/61904 [4:19:13<20:27:56, 1.38s/it] 14%|█▎ | 8439/61904 [4:19:15<20:22:39, 1.37s/it] 14%|█▎ | 8440/61904 [4:19:16<19:59:10, 1.35s/it] {'loss': 2.8761, 'learning_rate': 1.8664592246855957e-07, 'epoch': 2.18} + 14%|█▎ | 8440/61904 [4:19:16<19:59:10, 1.35s/it] 14%|█▎ | 8441/61904 [4:19:18<20:29:26, 1.38s/it] 14%|█▎ | 8442/61904 [4:19:19<20:25:36, 1.38s/it] 14%|█▎ | 8443/61904 [4:19:20<20:27:33, 1.38s/it] 14%|█▎ | 8444/61904 [4:19:22<20:26:33, 1.38s/it] 14%|█▎ | 8445/61904 [4:19:23<20:18:54, 1.37s/it] 14%|█▎ | 8446/61904 [4:19:24<20:01:44, 1.35s/it] 14%|█▎ | 8447/61904 [4:19:25<19:24:25, 1.31s/it] 14%|█▎ | 8448/61904 [4:19:27<19:21:38, 1.30s/it] 14%|█▎ | 8449/61904 [4:19:28<19:07:55, 1.29s/it] 14%|█▎ | 8450/61904 [4:19:29<19:21:56, 1.30s/it] 14%|█▎ | 8451/61904 [4:19:31<19:35:34, 1.32s/it] 14%|█▎ | 8452/61904 [4:19:32<19:38:20, 1.32s/it] 14%|█▎ | 8453/61904 [4:19:34<20:25:25, 1.38s/it] 14%|█▎ | 8454/61904 [4:19:35<21:04:03, 1.42s/it] 14%|█▎ | 8455/61904 [4:19:36<20:33:33, 1.38s/it] 14%|█▎ | 8456/61904 [4:19:38<20:29:41, 1.38s/it] 14%|█▎ | 8457/61904 [4:19:39<19:41:53, 1.33s/it] 14%|█▎ | 8458/61904 [4:19:40<20:02:37, 1.35s/it] 14%|█▎ | 8459/61904 [4:19:42<20:18:04, 1.37s/it] 14%|█▎ | 8460/61904 [4:19:43<20:26:33, 1.38s/it] {'loss': 2.9129, 'learning_rate': 1.8661350965901723e-07, 'epoch': 2.19} + 14%|█▎ | 8460/61904 [4:19:43<20:26:33, 1.38s/it] 14%|█▎ | 8461/61904 [4:19:44<19:51:21, 1.34s/it] 14%|█▎ | 8462/61904 [4:19:46<19:46:02, 1.33s/it] 14%|█▎ | 8463/61904 [4:19:47<20:21:21, 1.37s/it] 14%|█▎ | 8464/61904 [4:19:48<19:42:26, 1.33s/it] 14%|█▎ | 8465/61904 [4:19:50<20:01:30, 1.35s/it] 14%|█▎ | 8466/61904 [4:19:51<20:21:41, 1.37s/it] 14%|█▎ | 8467/61904 [4:19:53<20:33:51, 1.39s/it] 14%|█▎ | 8468/61904 [4:19:54<20:09:31, 1.36s/it] 14%|█▎ | 8469/61904 [4:19:55<20:13:51, 1.36s/it] 14%|█▎ | 8470/61904 [4:19:57<19:56:09, 1.34s/it] 14%|█▎ | 8471/61904 [4:19:58<20:54:11, 1.41s/it] 14%|█▎ | 8472/61904 [4:20:00<21:02:54, 1.42s/it] 14%|█▎ | 8473/61904 [4:20:01<20:08:54, 1.36s/it] 14%|█▎ | 8474/61904 [4:20:02<20:02:22, 1.35s/it] 14%|█▎ | 8475/61904 [4:20:04<20:13:33, 1.36s/it] 14%|█▎ | 8476/61904 [4:20:05<20:09:38, 1.36s/it] 14%|█▎ | 8477/61904 [4:20:06<19:57:11, 1.34s/it] 14%|█▎ | 8478/61904 [4:20:08<19:53:15, 1.34s/it] 14%|█▎ | 8479/61904 [4:20:09<19:53:55, 1.34s/it] 14%|█▎ | 8480/61904 [4:20:10<19:48:00, 1.33s/it] {'loss': 2.8907, 'learning_rate': 1.8658109684947492e-07, 'epoch': 2.19} + 14%|█▎ | 8480/61904 [4:20:10<19:48:00, 1.33s/it] 14%|█▎ | 8481/61904 [4:20:12<20:42:28, 1.40s/it] 14%|█▎ | 8482/61904 [4:20:13<20:41:54, 1.39s/it] 14%|█▎ | 8483/61904 [4:20:15<20:31:08, 1.38s/it] 14%|█▎ | 8484/61904 [4:20:16<19:50:38, 1.34s/it] 14%|█▎ | 8485/61904 [4:20:17<19:37:27, 1.32s/it] 14%|█▎ | 8486/61904 [4:20:18<19:16:39, 1.30s/it] 14%|█▎ | 8487/61904 [4:20:20<19:55:06, 1.34s/it] 14%|█▎ | 8488/61904 [4:20:21<19:21:42, 1.30s/it] 14%|█▎ | 8489/61904 [4:20:22<19:41:20, 1.33s/it] 14%|█▎ | 8490/61904 [4:20:24<19:49:52, 1.34s/it] 14%|█▎ | 8491/61904 [4:20:25<19:59:54, 1.35s/it] 14%|█▎ | 8492/61904 [4:20:26<19:45:18, 1.33s/it] 14%|█▎ | 8493/61904 [4:20:28<20:15:45, 1.37s/it] 14%|█▎ | 8494/61904 [4:20:29<20:29:27, 1.38s/it] 14%|█▎ | 8495/61904 [4:20:31<20:22:09, 1.37s/it] 14%|█▎ | 8496/61904 [4:20:32<21:08:34, 1.43s/it] 14%|█▎ | 8497/61904 [4:20:33<20:39:51, 1.39s/it] 14%|█▎ | 8498/61904 [4:20:35<20:56:05, 1.41s/it] 14%|█▎ | 8499/61904 [4:20:36<21:22:05, 1.44s/it] 14%|█▎ | 8500/61904 [4:20:38<20:33:06, 1.39s/it] {'loss': 2.9062, 'learning_rate': 1.8654868403993255e-07, 'epoch': 2.2} + 14%|█▎ | 8500/61904 [4:20:38<20:33:06, 1.39s/it] 14%|█▎ | 8501/61904 [4:20:39<19:41:54, 1.33s/it] 14%|█▎ | 8502/61904 [4:20:40<19:44:45, 1.33s/it] 14%|█▎ | 8503/61904 [4:20:42<20:19:15, 1.37s/it] 14%|█▎ | 8504/61904 [4:20:43<20:05:30, 1.35s/it] 14%|█▎ | 8505/61904 [4:20:44<20:03:11, 1.35s/it] 14%|█▎ | 8506/61904 [4:20:46<20:14:36, 1.36s/it] 14%|█▎ | 8507/61904 [4:20:47<19:49:44, 1.34s/it] 14%|█▎ | 8508/61904 [4:20:48<19:47:51, 1.33s/it] 14%|█▎ | 8509/61904 [4:20:50<19:36:16, 1.32s/it] 14%|█▎ | 8510/61904 [4:20:51<20:00:48, 1.35s/it] 14%|█▎ | 8511/61904 [4:20:52<19:47:10, 1.33s/it] 14%|█▍ | 8512/61904 [4:20:54<19:20:03, 1.30s/it] 14%|█▍ | 8513/61904 [4:20:55<19:27:05, 1.31s/it] 14%|█▍ | 8514/61904 [4:20:56<19:43:58, 1.33s/it] 14%|█▍ | 8515/61904 [4:20:58<19:45:30, 1.33s/it] 14%|█▍ | 8516/61904 [4:20:59<19:44:44, 1.33s/it] 14%|█▍ | 8517/61904 [4:21:00<20:16:37, 1.37s/it] 14%|█▍ | 8518/61904 [4:21:02<20:29:15, 1.38s/it] 14%|█▍ | 8519/61904 [4:21:03<20:00:00, 1.35s/it] 14%|█▍ | 8520/61904 [4:21:04<19:48:13, 1.34s/it] {'loss': 2.9185, 'learning_rate': 1.8651627123039024e-07, 'epoch': 2.2} + 14%|█▍ | 8520/61904 [4:21:04<19:48:13, 1.34s/it] 14%|█▍ | 8521/61904 [4:21:06<20:29:26, 1.38s/it] 14%|█▍ | 8522/61904 [4:21:07<19:59:14, 1.35s/it] 14%|█▍ | 8523/61904 [4:21:08<19:58:58, 1.35s/it] 14%|█▍ | 8524/61904 [4:21:10<19:57:29, 1.35s/it] 14%|█▍ | 8525/61904 [4:21:11<19:11:02, 1.29s/it] 14%|█▍ | 8526/61904 [4:21:12<19:09:53, 1.29s/it] 14%|█▍ | 8527/61904 [4:21:14<19:44:46, 1.33s/it] 14%|█▍ | 8528/61904 [4:21:15<19:28:18, 1.31s/it] 14%|█▍ | 8529/61904 [4:21:16<19:40:37, 1.33s/it] 14%|█▍ | 8530/61904 [4:21:18<19:24:53, 1.31s/it] 14%|█▍ | 8531/61904 [4:21:19<20:27:53, 1.38s/it] 14%|█▍ | 8532/61904 [4:21:20<20:22:01, 1.37s/it] 14%|█▍ | 8533/61904 [4:21:22<20:11:28, 1.36s/it] 14%|█▍ | 8534/61904 [4:21:23<20:07:06, 1.36s/it] 14%|█▍ | 8535/61904 [4:21:24<20:00:05, 1.35s/it] 14%|█▍ | 8536/61904 [4:21:26<20:02:55, 1.35s/it] 14%|█▍ | 8537/61904 [4:21:27<19:56:14, 1.34s/it] 14%|█▍ | 8538/61904 [4:21:28<19:41:03, 1.33s/it] 14%|█▍ | 8539/61904 [4:21:30<19:16:53, 1.30s/it] 14%|█▍ | 8540/61904 [4:21:31<19:06:12, 1.29s/it] {'loss': 2.9334, 'learning_rate': 1.864838584208479e-07, 'epoch': 2.21} + 14%|█▍ | 8540/61904 [4:21:31<19:06:12, 1.29s/it] 14%|█▍ | 8541/61904 [4:21:32<19:37:58, 1.32s/it] 14%|█▍ | 8542/61904 [4:21:34<20:27:47, 1.38s/it] 14%|█▍ | 8543/61904 [4:21:35<21:13:28, 1.43s/it] 14%|█▍ | 8544/61904 [4:21:37<20:54:42, 1.41s/it] 14%|█▍ | 8545/61904 [4:21:38<20:56:26, 1.41s/it] 14%|█▍ | 8546/61904 [4:21:40<21:02:26, 1.42s/it] 14%|█▍ | 8547/61904 [4:21:41<21:11:43, 1.43s/it] 14%|█▍ | 8548/61904 [4:21:43<21:04:22, 1.42s/it] 14%|█▍ | 8549/61904 [4:21:44<22:23:51, 1.51s/it] 14%|█▍ | 8550/61904 [4:21:46<21:24:50, 1.44s/it] 14%|█▍ | 8551/61904 [4:21:47<20:38:09, 1.39s/it] 14%|█▍ | 8552/61904 [4:21:48<19:55:00, 1.34s/it] 14%|█▍ | 8553/61904 [4:21:49<20:20:57, 1.37s/it] 14%|█▍ | 8554/61904 [4:21:51<20:14:21, 1.37s/it] 14%|█▍ | 8555/61904 [4:21:52<20:31:38, 1.39s/it] 14%|█▍ | 8556/61904 [4:21:54<21:05:17, 1.42s/it] 14%|█▍ | 8557/61904 [4:21:55<20:18:52, 1.37s/it] 14%|█▍ | 8558/61904 [4:21:56<20:23:20, 1.38s/it] 14%|█▍ | 8559/61904 [4:21:58<20:12:32, 1.36s/it] 14%|█▍ | 8560/61904 [4:21:59<19:42:54, 1.33s/it] {'loss': 2.8598, 'learning_rate': 1.8645144561130556e-07, 'epoch': 2.21} + 14%|█▍ | 8560/61904 [4:21:59<19:42:54, 1.33s/it] 14%|█▍ | 8561/61904 [4:22:00<19:59:49, 1.35s/it] 14%|█▍ | 8562/61904 [4:22:02<19:44:06, 1.33s/it] 14%|█▍ | 8563/61904 [4:22:03<19:58:23, 1.35s/it] 14%|█▍ | 8564/61904 [4:22:04<19:54:00, 1.34s/it] 14%|█▍ | 8565/61904 [4:22:06<19:45:04, 1.33s/it] 14%|█▍ | 8566/61904 [4:22:07<19:47:01, 1.34s/it] 14%|█▍ | 8567/61904 [4:22:08<19:55:16, 1.34s/it] 14%|█▍ | 8568/61904 [4:22:10<19:57:30, 1.35s/it] 14%|█▍ | 8569/61904 [4:22:11<19:46:47, 1.34s/it] 14%|█▍ | 8570/61904 [4:22:12<19:54:10, 1.34s/it] 14%|█▍ | 8571/61904 [4:22:14<20:06:20, 1.36s/it] 14%|█▍ | 8572/61904 [4:22:15<20:37:07, 1.39s/it] 14%|█▍ | 8573/61904 [4:22:17<20:14:25, 1.37s/it] 14%|█▍ | 8574/61904 [4:22:18<20:18:06, 1.37s/it] 14%|█▍ | 8575/61904 [4:22:19<19:51:50, 1.34s/it] 14%|█▍ | 8576/61904 [4:22:21<19:52:33, 1.34s/it] 14%|█▍ | 8577/61904 [4:22:22<20:22:17, 1.38s/it] 14%|█▍ | 8578/61904 [4:22:23<20:27:24, 1.38s/it] 14%|█▍ | 8579/61904 [4:22:25<21:11:05, 1.43s/it] 14%|█▍ | 8580/61904 [4:22:26<21:08:34, 1.43s/it] {'loss': 2.8341, 'learning_rate': 1.8641903280176325e-07, 'epoch': 2.22} + 14%|█▍ | 8580/61904 [4:22:26<21:08:34, 1.43s/it] 14%|█▍ | 8581/61904 [4:22:28<22:01:18, 1.49s/it] 14%|█▍ | 8582/61904 [4:22:29<21:46:18, 1.47s/it] 14%|█▍ | 8583/61904 [4:22:31<21:25:22, 1.45s/it] 14%|█▍ | 8584/61904 [4:22:32<21:02:30, 1.42s/it] 14%|█▍ | 8585/61904 [4:22:34<20:41:15, 1.40s/it] 14%|█▍ | 8586/61904 [4:22:35<20:33:36, 1.39s/it] 14%|█▍ | 8587/61904 [4:22:36<20:28:29, 1.38s/it] 14%|█▍ | 8588/61904 [4:22:38<20:32:12, 1.39s/it] 14%|█▍ | 8589/61904 [4:22:39<20:07:47, 1.36s/it] 14%|█▍ | 8590/61904 [4:22:40<20:16:22, 1.37s/it] 14%|█▍ | 8591/61904 [4:22:42<20:40:18, 1.40s/it] 14%|█▍ | 8592/61904 [4:22:43<20:24:36, 1.38s/it] 14%|█▍ | 8593/61904 [4:22:44<20:12:46, 1.36s/it] 14%|█▍ | 8594/61904 [4:22:46<20:03:23, 1.35s/it] 14%|█▍ | 8595/61904 [4:22:47<19:41:58, 1.33s/it] 14%|█▍ | 8596/61904 [4:22:49<20:26:09, 1.38s/it] 14%|█▍ | 8597/61904 [4:22:50<19:59:04, 1.35s/it] 14%|█▍ | 8598/61904 [4:22:51<20:08:08, 1.36s/it] 14%|█▍ | 8599/61904 [4:22:53<21:14:26, 1.43s/it] 14%|█▍ | 8600/61904 [4:22:54<20:21:04, 1.37s/it] {'loss': 2.9006, 'learning_rate': 1.8638661999222091e-07, 'epoch': 2.22} + 14%|█▍ | 8600/61904 [4:22:54<20:21:04, 1.37s/it] 14%|█▍ | 8601/61904 [4:22:55<20:13:12, 1.37s/it] 14%|█▍ | 8602/61904 [4:22:57<20:30:07, 1.38s/it] 14%|█▍ | 8603/61904 [4:22:58<20:24:11, 1.38s/it] 14%|█▍ | 8604/61904 [4:22:59<19:36:01, 1.32s/it] 14%|█▍ | 8605/61904 [4:23:01<19:50:25, 1.34s/it] 14%|█▍ | 8606/61904 [4:23:02<19:38:21, 1.33s/it] 14%|█▍ | 8607/61904 [4:23:04<20:20:56, 1.37s/it] 14%|█▍ | 8608/61904 [4:23:05<20:04:05, 1.36s/it] 14%|█▍ | 8609/61904 [4:23:06<19:55:10, 1.35s/it] 14%|█▍ | 8610/61904 [4:23:08<20:07:54, 1.36s/it] 14%|█▍ | 8611/61904 [4:23:09<19:40:52, 1.33s/it] 14%|█▍ | 8612/61904 [4:23:10<19:29:22, 1.32s/it] 14%|█▍ | 8613/61904 [4:23:12<19:35:13, 1.32s/it] 14%|█▍ | 8614/61904 [4:23:13<20:10:04, 1.36s/it] 14%|█▍ | 8615/61904 [4:23:14<19:51:46, 1.34s/it] 14%|█▍ | 8616/61904 [4:23:16<19:30:17, 1.32s/it] 14%|█▍ | 8617/61904 [4:23:17<19:12:42, 1.30s/it] 14%|█▍ | 8618/61904 [4:23:18<20:03:13, 1.35s/it] 14%|█▍ | 8619/61904 [4:23:20<19:42:14, 1.33s/it] 14%|█▍ | 8620/61904 [4:23:21<19:27:15, 1.31s/it] {'loss': 2.8851, 'learning_rate': 1.8635420718267858e-07, 'epoch': 2.23} + 14%|█▍ | 8620/61904 [4:23:21<19:27:15, 1.31s/it] 14%|█▍ | 8621/61904 [4:23:22<19:10:55, 1.30s/it] 14%|█▍ | 8622/61904 [4:23:23<19:32:48, 1.32s/it] 14%|█▍ | 8623/61904 [4:23:25<19:34:14, 1.32s/it] 14%|█▍ | 8624/61904 [4:23:26<20:00:05, 1.35s/it] 14%|█▍ | 8625/61904 [4:23:28<19:52:34, 1.34s/it] 14%|█▍ | 8626/61904 [4:23:29<19:48:14, 1.34s/it] 14%|█▍ | 8627/61904 [4:23:30<19:44:40, 1.33s/it] 14%|█▍ | 8628/61904 [4:23:32<19:58:55, 1.35s/it] 14%|█▍ | 8629/61904 [4:23:33<20:24:50, 1.38s/it] 14%|█▍ | 8630/61904 [4:23:34<19:53:56, 1.34s/it] 14%|█▍ | 8631/61904 [4:23:36<19:38:00, 1.33s/it] 14%|█▍ | 8632/61904 [4:23:37<19:16:29, 1.30s/it] 14%|█▍ | 8633/61904 [4:23:38<19:56:40, 1.35s/it] 14%|█▍ | 8634/61904 [4:23:40<20:42:27, 1.40s/it] 14%|█▍ | 8635/61904 [4:23:41<20:30:31, 1.39s/it] 14%|█▍ | 8636/61904 [4:23:42<20:11:48, 1.36s/it] 14%|█▍ | 8637/61904 [4:23:44<20:31:40, 1.39s/it] 14%|█▍ | 8638/61904 [4:23:45<21:04:23, 1.42s/it] 14%|█▍ | 8639/61904 [4:23:47<21:03:35, 1.42s/it] 14%|█▍ | 8640/61904 [4:23:48<21:13:19, 1.43s/it] {'loss': 2.8807, 'learning_rate': 1.8632179437313626e-07, 'epoch': 2.23} + 14%|█▍ | 8640/61904 [4:23:48<21:13:19, 1.43s/it] 14%|█▍ | 8641/61904 [4:23:50<20:59:51, 1.42s/it] 14%|█▍ | 8642/61904 [4:23:51<20:36:49, 1.39s/it] 14%|█▍ | 8643/61904 [4:23:52<20:24:02, 1.38s/it] 14%|█▍ | 8644/61904 [4:23:54<19:59:24, 1.35s/it] 14%|█▍ | 8645/61904 [4:23:55<20:11:01, 1.36s/it] 14%|█▍ | 8646/61904 [4:23:56<20:25:15, 1.38s/it] 14%|█▍ | 8647/61904 [4:23:58<21:09:59, 1.43s/it] 14%|█▍ | 8648/61904 [4:23:59<20:40:46, 1.40s/it] 14%|█▍ | 8649/61904 [4:24:01<20:00:22, 1.35s/it] 14%|█▍ | 8650/61904 [4:24:02<19:47:23, 1.34s/it] 14%|█▍ | 8651/61904 [4:24:03<20:22:23, 1.38s/it] 14%|█▍ | 8652/61904 [4:24:05<20:21:49, 1.38s/it] 14%|█▍ | 8653/61904 [4:24:06<19:58:50, 1.35s/it] 14%|█▍ | 8654/61904 [4:24:07<19:51:04, 1.34s/it] 14%|█▍ | 8655/61904 [4:24:09<19:38:23, 1.33s/it] 14%|█▍ | 8656/61904 [4:24:10<19:01:01, 1.29s/it] 14%|█▍ | 8657/61904 [4:24:11<19:04:37, 1.29s/it] 14%|█▍ | 8658/61904 [4:24:12<19:09:51, 1.30s/it] 14%|█▍ | 8659/61904 [4:24:14<19:25:34, 1.31s/it] 14%|█▍ | 8660/61904 [4:24:15<19:32:07, 1.32s/it] {'loss': 2.8064, 'learning_rate': 1.8628938156359393e-07, 'epoch': 2.24} + 14%|█▍ | 8660/61904 [4:24:15<19:32:07, 1.32s/it] 14%|█▍ | 8661/61904 [4:24:16<19:35:49, 1.33s/it] 14%|█▍ | 8662/61904 [4:24:18<18:57:21, 1.28s/it] 14%|█▍ | 8663/61904 [4:24:19<19:45:23, 1.34s/it] 14%|█▍ | 8664/61904 [4:24:20<19:24:43, 1.31s/it] 14%|█▍ | 8665/61904 [4:24:22<20:11:05, 1.36s/it] 14%|█▍ | 8666/61904 [4:24:23<19:43:52, 1.33s/it] 14%|█▍ | 8667/61904 [4:24:24<19:46:25, 1.34s/it] 14%|█▍ | 8668/61904 [4:24:26<20:07:16, 1.36s/it] 14%|█▍ | 8669/61904 [4:24:27<20:08:33, 1.36s/it] 14%|█▍ | 8670/61904 [4:24:29<20:07:48, 1.36s/it] 14%|█▍ | 8671/61904 [4:24:30<19:55:38, 1.35s/it] 14%|█▍ | 8672/61904 [4:24:31<19:48:48, 1.34s/it] 14%|█▍ | 8673/61904 [4:24:33<20:10:48, 1.36s/it] 14%|█▍ | 8674/61904 [4:24:34<19:58:47, 1.35s/it] 14%|█▍ | 8675/61904 [4:24:35<20:21:39, 1.38s/it] 14%|█▍ | 8676/61904 [4:24:37<20:28:43, 1.39s/it] 14%|█▍ | 8677/61904 [4:24:38<20:17:18, 1.37s/it] 14%|█▍ | 8678/61904 [4:24:39<20:00:40, 1.35s/it] 14%|█▍ | 8679/61904 [4:24:41<20:37:34, 1.40s/it] 14%|█▍ | 8680/61904 [4:24:42<20:34:50, 1.39s/it] {'loss': 2.89, 'learning_rate': 1.862569687540516e-07, 'epoch': 2.24} + 14%|█▍ | 8680/61904 [4:24:42<20:34:50, 1.39s/it] 14%|█▍ | 8681/61904 [4:24:44<20:11:19, 1.37s/it] 14%|█▍ | 8682/61904 [4:24:45<19:46:21, 1.34s/it] 14%|█▍ | 8683/61904 [4:24:46<20:06:39, 1.36s/it] 14%|█▍ | 8684/61904 [4:24:48<19:50:04, 1.34s/it] 14%|█▍ | 8685/61904 [4:24:49<19:31:59, 1.32s/it] 14%|█▍ | 8686/61904 [4:24:50<19:37:20, 1.33s/it] 14%|█▍ | 8687/61904 [4:24:51<19:24:31, 1.31s/it] 14%|█▍ | 8688/61904 [4:24:53<20:26:34, 1.38s/it] 14%|█▍ | 8689/61904 [4:24:54<20:08:45, 1.36s/it] 14%|█▍ | 8690/61904 [4:24:56<20:11:31, 1.37s/it] 14%|█▍ | 8691/61904 [4:24:57<19:45:50, 1.34s/it] 14%|█▍ | 8692/61904 [4:24:58<19:28:28, 1.32s/it] 14%|█▍ | 8693/61904 [4:25:00<19:38:07, 1.33s/it] 14%|█▍ | 8694/61904 [4:25:01<20:10:51, 1.37s/it] 14%|█▍ | 8695/61904 [4:25:03<20:35:10, 1.39s/it] 14%|█▍ | 8696/61904 [4:25:04<20:10:56, 1.37s/it] 14%|█▍ | 8697/61904 [4:25:05<19:45:17, 1.34s/it] 14%|█▍ | 8698/61904 [4:25:06<19:59:11, 1.35s/it] 14%|█▍ | 8699/61904 [4:25:08<20:25:20, 1.38s/it] 14%|█▍ | 8700/61904 [4:25:09<20:53:01, 1.41s/it] {'loss': 2.8585, 'learning_rate': 1.8622455594450928e-07, 'epoch': 2.25} + 14%|█▍ | 8700/61904 [4:25:09<20:53:01, 1.41s/it] 14%|█▍ | 8701/61904 [4:25:11<20:30:35, 1.39s/it] 14%|█▍ | 8702/61904 [4:25:12<19:50:57, 1.34s/it] 14%|█▍ | 8703/61904 [4:25:13<19:37:08, 1.33s/it] 14%|█▍ | 8704/61904 [4:25:15<20:21:30, 1.38s/it] 14%|█▍ | 8705/61904 [4:25:16<20:16:55, 1.37s/it] 14%|█▍ | 8706/61904 [4:25:18<20:20:44, 1.38s/it] 14%|█▍ | 8707/61904 [4:25:19<19:55:18, 1.35s/it] 14%|█▍ | 8708/61904 [4:25:20<19:59:40, 1.35s/it] 14%|█▍ | 8709/61904 [4:25:21<19:37:47, 1.33s/it] 14%|█▍ | 8710/61904 [4:25:23<20:02:25, 1.36s/it] 14%|█▍ | 8711/61904 [4:25:24<20:29:54, 1.39s/it] 14%|█▍ | 8712/61904 [4:25:26<20:24:51, 1.38s/it] 14%|█▍ | 8713/61904 [4:25:27<19:48:34, 1.34s/it] 14%|█▍ | 8714/61904 [4:25:28<20:31:22, 1.39s/it] 14%|█▍ | 8715/61904 [4:25:30<19:44:21, 1.34s/it] 14%|█▍ | 8716/61904 [4:25:31<19:27:57, 1.32s/it] 14%|█▍ | 8717/61904 [4:25:32<19:22:29, 1.31s/it] 14%|█▍ | 8718/61904 [4:25:34<20:09:31, 1.36s/it] 14%|█▍ | 8719/61904 [4:25:35<20:24:55, 1.38s/it] 14%|█▍ | 8720/61904 [4:25:37<20:34:59, 1.39s/it] {'loss': 2.8825, 'learning_rate': 1.861921431349669e-07, 'epoch': 2.25} + 14%|█▍ | 8720/61904 [4:25:37<20:34:59, 1.39s/it] 14%|█▍ | 8721/61904 [4:25:38<20:24:26, 1.38s/it] 14%|█▍ | 8722/61904 [4:25:39<19:59:58, 1.35s/it] 14%|█▍ | 8723/61904 [4:25:40<19:38:48, 1.33s/it] 14%|█▍ | 8724/61904 [4:25:42<19:50:33, 1.34s/it] 14%|█▍ | 8725/61904 [4:25:43<20:03:31, 1.36s/it] 14%|█▍ | 8726/61904 [4:25:45<20:05:01, 1.36s/it] 14%|█▍ | 8727/61904 [4:25:46<20:23:49, 1.38s/it] 14%|█▍ | 8728/61904 [4:25:47<20:05:15, 1.36s/it] 14%|█▍ | 8729/61904 [4:25:49<19:28:48, 1.32s/it] 14%|█▍ | 8730/61904 [4:25:50<19:05:32, 1.29s/it] 14%|█▍ | 8731/61904 [4:25:51<19:03:17, 1.29s/it] 14%|█▍ | 8732/61904 [4:25:52<19:17:43, 1.31s/it] 14%|█▍ | 8733/61904 [4:25:54<19:25:24, 1.32s/it] 14%|█▍ | 8734/61904 [4:25:55<19:17:52, 1.31s/it] 14%|█▍ | 8735/61904 [4:25:57<20:11:49, 1.37s/it] 14%|█▍ | 8736/61904 [4:25:58<20:08:31, 1.36s/it] 14%|█▍ | 8737/61904 [4:25:59<19:57:34, 1.35s/it] 14%|█▍ | 8738/61904 [4:26:01<20:00:42, 1.36s/it] 14%|█▍ | 8739/61904 [4:26:02<19:28:38, 1.32s/it] 14%|█▍ | 8740/61904 [4:26:03<19:58:46, 1.35s/it] {'loss': 2.8311, 'learning_rate': 1.861597303254246e-07, 'epoch': 2.26} + 14%|█▍ | 8740/61904 [4:26:03<19:58:46, 1.35s/it] 14%|█▍ | 8741/61904 [4:26:05<19:50:05, 1.34s/it] 14%|█▍ | 8742/61904 [4:26:06<20:14:26, 1.37s/it] 14%|█▍ | 8743/61904 [4:26:07<19:58:04, 1.35s/it] 14%|█▍ | 8744/61904 [4:26:09<20:07:32, 1.36s/it] 14%|█▍ | 8745/61904 [4:26:10<19:40:48, 1.33s/it] 14%|█▍ | 8746/61904 [4:26:11<19:40:04, 1.33s/it] 14%|█▍ | 8747/61904 [4:26:13<19:46:32, 1.34s/it] 14%|█▍ | 8748/61904 [4:26:14<19:30:02, 1.32s/it] 14%|█▍ | 8749/61904 [4:26:16<20:39:43, 1.40s/it] 14%|█▍ | 8750/61904 [4:26:17<20:35:55, 1.40s/it] 14%|█▍ | 8751/61904 [4:26:18<20:20:23, 1.38s/it] 14%|█▍ | 8752/61904 [4:26:20<20:23:38, 1.38s/it] 14%|█▍ | 8753/61904 [4:26:21<21:12:01, 1.44s/it] 14%|█▍ | 8754/61904 [4:26:23<21:50:40, 1.48s/it] 14%|█▍ | 8755/61904 [4:26:24<20:58:56, 1.42s/it] 14%|█▍ | 8756/61904 [4:26:25<20:13:44, 1.37s/it] 14%|█▍ | 8757/61904 [4:26:27<20:58:39, 1.42s/it] 14%|█▍ | 8758/61904 [4:26:28<20:08:31, 1.36s/it] 14%|█▍ | 8759/61904 [4:26:30<20:22:43, 1.38s/it] 14%|█▍ | 8760/61904 [4:26:31<20:27:50, 1.39s/it] {'loss': 2.8857, 'learning_rate': 1.8612731751588226e-07, 'epoch': 2.26} + 14%|█▍ | 8760/61904 [4:26:31<20:27:50, 1.39s/it] 14%|█▍ | 8761/61904 [4:26:32<20:12:01, 1.37s/it] 14%|█▍ | 8762/61904 [4:26:34<19:59:02, 1.35s/it] 14%|█▍ | 8763/61904 [4:26:35<20:08:47, 1.36s/it] 14%|█▍ | 8764/61904 [4:26:36<19:43:39, 1.34s/it] 14%|█▍ | 8765/61904 [4:26:38<19:35:52, 1.33s/it] 14%|█▍ | 8766/61904 [4:26:39<19:41:15, 1.33s/it] 14%|█▍ | 8767/61904 [4:26:40<19:44:53, 1.34s/it] 14%|█▍ | 8768/61904 [4:26:42<19:51:59, 1.35s/it] 14%|█▍ | 8769/61904 [4:26:43<20:37:51, 1.40s/it] 14%|█▍ | 8770/61904 [4:26:44<20:19:04, 1.38s/it] 14%|█▍ | 8771/61904 [4:26:46<20:04:04, 1.36s/it] 14%|█▍ | 8772/61904 [4:26:47<19:46:26, 1.34s/it] 14%|█▍ | 8773/61904 [4:26:48<19:53:49, 1.35s/it] 14%|█▍ | 8774/61904 [4:26:50<21:07:30, 1.43s/it] 14%|█▍ | 8775/61904 [4:26:51<21:00:39, 1.42s/it] 14%|█▍ | 8776/61904 [4:26:53<21:14:02, 1.44s/it] 14%|█▍ | 8777/61904 [4:26:54<20:47:29, 1.41s/it] 14%|█▍ | 8778/61904 [4:26:56<20:15:05, 1.37s/it] 14%|█▍ | 8779/61904 [4:26:57<19:43:37, 1.34s/it] 14%|█▍ | 8780/61904 [4:26:58<20:08:32, 1.36s/it] {'loss': 2.8278, 'learning_rate': 1.8609490470633992e-07, 'epoch': 2.27} + 14%|█▍ | 8780/61904 [4:26:58<20:08:32, 1.36s/it] 14%|█▍ | 8781/61904 [4:27:00<19:59:52, 1.36s/it] 14%|█▍ | 8782/61904 [4:27:01<20:02:11, 1.36s/it] 14%|█▍ | 8783/61904 [4:27:02<19:54:28, 1.35s/it] 14%|█▍ | 8784/61904 [4:27:04<19:58:35, 1.35s/it] 14%|█▍ | 8785/61904 [4:27:05<19:46:01, 1.34s/it] 14%|█▍ | 8786/61904 [4:27:06<20:28:34, 1.39s/it] 14%|█▍ | 8787/61904 [4:27:08<20:14:30, 1.37s/it] 14%|█▍ | 8788/61904 [4:27:09<20:13:52, 1.37s/it] 14%|█▍ | 8789/61904 [4:27:11<20:27:29, 1.39s/it] 14%|█▍ | 8790/61904 [4:27:12<19:50:34, 1.34s/it] 14%|█▍ | 8791/61904 [4:27:13<19:54:26, 1.35s/it] 14%|█▍ | 8792/61904 [4:27:15<20:11:51, 1.37s/it] 14%|█▍ | 8793/61904 [4:27:16<19:43:44, 1.34s/it] 14%|█▍ | 8794/61904 [4:27:17<20:33:35, 1.39s/it] 14%|█▍ | 8795/61904 [4:27:19<20:30:30, 1.39s/it] 14%|█▍ | 8796/61904 [4:27:20<20:20:38, 1.38s/it] 14%|█▍ | 8797/61904 [4:27:22<20:29:16, 1.39s/it] 14%|█▍ | 8798/61904 [4:27:23<20:30:22, 1.39s/it] 14%|█▍ | 8799/61904 [4:27:24<20:54:53, 1.42s/it] 14%|█▍ | 8800/61904 [4:27:26<21:11:56, 1.44s/it] {'loss': 2.8549, 'learning_rate': 1.860624918967976e-07, 'epoch': 2.27} + 14%|█▍ | 8800/61904 [4:27:26<21:11:56, 1.44s/it] 14%|█▍ | 8801/61904 [4:27:27<20:36:18, 1.40s/it] 14%|█▍ | 8802/61904 [4:27:29<21:08:52, 1.43s/it] 14%|█▍ | 8803/61904 [4:27:30<20:10:30, 1.37s/it] 14%|█▍ | 8804/61904 [4:27:31<19:50:13, 1.34s/it] 14%|█▍ | 8805/61904 [4:27:33<20:18:03, 1.38s/it] 14%|█▍ | 8806/61904 [4:27:34<19:56:55, 1.35s/it] 14%|█▍ | 8807/61904 [4:27:35<20:28:15, 1.39s/it] 14%|█▍ | 8808/61904 [4:27:37<20:11:56, 1.37s/it] 14%|█▍ | 8809/61904 [4:27:38<20:31:34, 1.39s/it] 14%|█▍ | 8810/61904 [4:27:39<20:08:21, 1.37s/it] 14%|█▍ | 8811/61904 [4:27:41<20:48:27, 1.41s/it] 14%|█▍ | 8812/61904 [4:27:42<20:11:58, 1.37s/it] 14%|█▍ | 8813/61904 [4:27:44<19:53:52, 1.35s/it] 14%|█▍ | 8814/61904 [4:27:45<20:18:46, 1.38s/it] 14%|█▍ | 8815/61904 [4:27:46<20:10:45, 1.37s/it] 14%|█▍ | 8816/61904 [4:27:48<20:12:21, 1.37s/it] 14%|█▍ | 8817/61904 [4:27:49<19:36:16, 1.33s/it] 14%|█▍ | 8818/61904 [4:27:50<20:14:54, 1.37s/it] 14%|█▍ | 8819/61904 [4:27:52<20:26:28, 1.39s/it] 14%|█▍ | 8820/61904 [4:27:53<19:59:38, 1.36s/it] {'loss': 2.8433, 'learning_rate': 1.8603007908725527e-07, 'epoch': 2.28} + 14%|█▍ | 8820/61904 [4:27:53<19:59:38, 1.36s/it] 14%|█▍ | 8821/61904 [4:27:55<21:22:28, 1.45s/it] 14%|█▍ | 8822/61904 [4:27:56<20:38:39, 1.40s/it] 14%|█▍ | 8823/61904 [4:27:57<20:18:29, 1.38s/it] 14%|█▍ | 8824/61904 [4:27:59<20:12:33, 1.37s/it] 14%|█▍ | 8825/61904 [4:28:00<20:31:52, 1.39s/it] 14%|█▍ | 8826/61904 [4:28:02<20:08:29, 1.37s/it] 14%|█▍ | 8827/61904 [4:28:03<20:36:00, 1.40s/it] 14%|█▍ | 8828/61904 [4:28:04<19:48:12, 1.34s/it] 14%|█▍ | 8829/61904 [4:28:06<20:07:57, 1.37s/it] 14%|█▍ | 8830/61904 [4:28:07<20:29:17, 1.39s/it] 14%|█▍ | 8831/61904 [4:28:09<20:39:45, 1.40s/it] 14%|█▍ | 8832/61904 [4:28:10<20:27:36, 1.39s/it] 14%|█▍ | 8833/61904 [4:28:11<20:17:21, 1.38s/it] 14%|█▍ | 8834/61904 [4:28:13<20:12:24, 1.37s/it] 14%|█▍ | 8835/61904 [4:28:14<20:04:34, 1.36s/it] 14%|█▍ | 8836/61904 [4:28:15<19:56:24, 1.35s/it] 14%|█▍ | 8837/61904 [4:28:17<20:14:54, 1.37s/it] 14%|█▍ | 8838/61904 [4:28:18<19:31:03, 1.32s/it] 14%|█▍ | 8839/61904 [4:28:19<20:14:36, 1.37s/it] 14%|█▍ | 8840/61904 [4:28:21<20:11:30, 1.37s/it] {'loss': 2.8928, 'learning_rate': 1.8599766627771294e-07, 'epoch': 2.28} + 14%|█▍ | 8840/61904 [4:28:21<20:11:30, 1.37s/it] 14%|█▍ | 8841/61904 [4:28:22<20:03:43, 1.36s/it] 14%|█▍ | 8842/61904 [4:28:23<19:46:39, 1.34s/it] 14%|█▍ | 8843/61904 [4:28:25<20:33:16, 1.39s/it] 14%|█▍ | 8844/61904 [4:28:26<20:31:51, 1.39s/it] 14%|█▍ | 8845/61904 [4:28:28<20:49:34, 1.41s/it] 14%|█▍ | 8846/61904 [4:28:29<20:37:29, 1.40s/it] 14%|█▍ | 8847/61904 [4:28:30<20:07:52, 1.37s/it] 14%|█▍ | 8848/61904 [4:28:32<19:43:46, 1.34s/it] 14%|█▍ | 8849/61904 [4:28:33<19:52:20, 1.35s/it] 14%|█▍ | 8850/61904 [4:28:34<20:04:20, 1.36s/it] 14%|█▍ | 8851/61904 [4:28:36<20:08:12, 1.37s/it] 14%|█▍ | 8852/61904 [4:28:37<19:41:13, 1.34s/it] 14%|█▍ | 8853/61904 [4:28:38<20:00:25, 1.36s/it] 14%|█▍ | 8854/61904 [4:28:40<19:37:49, 1.33s/it] 14%|█▍ | 8855/61904 [4:28:41<19:56:09, 1.35s/it] 14%|█▍ | 8856/61904 [4:28:42<19:36:49, 1.33s/it] 14%|█▍ | 8857/61904 [4:28:44<19:54:07, 1.35s/it] 14%|█▍ | 8858/61904 [4:28:45<19:44:57, 1.34s/it] 14%|█▍ | 8859/61904 [4:28:47<19:48:27, 1.34s/it] 14%|█▍ | 8860/61904 [4:28:48<19:39:39, 1.33s/it] {'loss': 2.909, 'learning_rate': 1.8596525346817062e-07, 'epoch': 2.29} + 14%|█▍ | 8860/61904 [4:28:48<19:39:39, 1.33s/it] 14%|█▍ | 8861/61904 [4:28:49<19:28:56, 1.32s/it] 14%|█▍ | 8862/61904 [4:28:51<19:50:22, 1.35s/it] 14%|█▍ | 8863/61904 [4:28:52<19:36:11, 1.33s/it] 14%|█▍ | 8864/61904 [4:28:53<20:48:18, 1.41s/it] 14%|█▍ | 8865/61904 [4:28:55<20:42:14, 1.41s/it] 14%|█▍ | 8866/61904 [4:28:56<20:06:12, 1.36s/it] 14%|█▍ | 8867/61904 [4:28:57<19:47:05, 1.34s/it] 14%|█▍ | 8868/61904 [4:28:59<20:22:55, 1.38s/it] 14%|█▍ | 8869/61904 [4:29:00<20:02:43, 1.36s/it] 14%|█▍ | 8870/61904 [4:29:01<19:27:47, 1.32s/it] 14%|█▍ | 8871/61904 [4:29:03<19:39:27, 1.33s/it] 14%|█▍ | 8872/61904 [4:29:04<20:07:56, 1.37s/it] 14%|█▍ | 8873/61904 [4:29:06<20:11:49, 1.37s/it] 14%|█▍ | 8874/61904 [4:29:07<20:35:13, 1.40s/it] 14%|█▍ | 8875/61904 [4:29:08<20:00:43, 1.36s/it] 14%|█▍ | 8876/61904 [4:29:10<21:19:40, 1.45s/it] 14%|█▍ | 8877/61904 [4:29:11<20:38:21, 1.40s/it] 14%|█▍ | 8878/61904 [4:29:13<20:32:46, 1.39s/it] 14%|█▍ | 8879/61904 [4:29:14<19:32:08, 1.33s/it] 14%|█▍ | 8880/61904 [4:29:15<19:49:54, 1.35s/it] {'loss': 2.8203, 'learning_rate': 1.8593284065862829e-07, 'epoch': 2.29} + 14%|█▍ | 8880/61904 [4:29:15<19:49:54, 1.35s/it] 14%|█▍ | 8881/61904 [4:29:17<20:10:52, 1.37s/it] 14%|█▍ | 8882/61904 [4:29:18<20:24:15, 1.39s/it] 14%|█▍ | 8883/61904 [4:29:19<20:34:42, 1.40s/it] 14%|█▍ | 8884/61904 [4:29:21<21:11:59, 1.44s/it] 14%|█▍ | 8885/61904 [4:29:22<20:56:38, 1.42s/it] 14%|█▍ | 8886/61904 [4:29:24<21:20:45, 1.45s/it] 14%|█▍ | 8887/61904 [4:29:25<21:16:53, 1.45s/it] 14%|█▍ | 8888/61904 [4:29:27<21:38:57, 1.47s/it] 14%|█▍ | 8889/61904 [4:29:28<20:50:39, 1.42s/it] 14%|█▍ | 8890/61904 [4:29:29<19:51:12, 1.35s/it] 14%|█▍ | 8891/61904 [4:29:31<19:51:13, 1.35s/it] 14%|█▍ | 8892/61904 [4:29:32<19:45:42, 1.34s/it] 14%|█▍ | 8893/61904 [4:29:33<19:48:14, 1.34s/it] 14%|█▍ | 8894/61904 [4:29:35<20:31:46, 1.39s/it] 14%|█▍ | 8895/61904 [4:29:36<20:22:14, 1.38s/it] 14%|█▍ | 8896/61904 [4:29:38<20:20:52, 1.38s/it] 14%|█▍ | 8897/61904 [4:29:39<20:07:59, 1.37s/it] 14%|█▍ | 8898/61904 [4:29:40<19:50:01, 1.35s/it] 14%|█▍ | 8899/61904 [4:29:42<19:36:54, 1.33s/it] 14%|█▍ | 8900/61904 [4:29:43<19:37:05, 1.33s/it] {'loss': 2.8176, 'learning_rate': 1.8590042784908595e-07, 'epoch': 2.3} + 14%|█▍ | 8900/61904 [4:29:43<19:37:05, 1.33s/it] 14%|█▍ | 8901/61904 [4:29:44<19:02:40, 1.29s/it] 14%|█▍ | 8902/61904 [4:29:45<19:22:20, 1.32s/it] 14%|█▍ | 8903/61904 [4:29:47<19:39:25, 1.34s/it] 14%|█▍ | 8904/61904 [4:29:48<19:56:55, 1.36s/it] 14%|█▍ | 8905/61904 [4:29:50<20:01:25, 1.36s/it] 14%|█▍ | 8906/61904 [4:29:51<19:42:49, 1.34s/it] 14%|█▍ | 8907/61904 [4:29:52<19:20:00, 1.31s/it] 14%|█▍ | 8908/61904 [4:29:53<18:49:24, 1.28s/it] 14%|█▍ | 8909/61904 [4:29:55<19:18:41, 1.31s/it] 14%|█▍ | 8910/61904 [4:29:56<19:04:28, 1.30s/it] 14%|█▍ | 8911/61904 [4:29:57<19:12:37, 1.31s/it] 14%|█▍ | 8912/61904 [4:29:59<19:24:18, 1.32s/it] 14%|█▍ | 8913/61904 [4:30:00<20:02:00, 1.36s/it] 14%|█▍ | 8914/61904 [4:30:01<19:45:22, 1.34s/it] 14%|█▍ | 8915/61904 [4:30:03<19:37:51, 1.33s/it] 14%|█▍ | 8916/61904 [4:30:04<19:35:43, 1.33s/it] 14%|█▍ | 8917/61904 [4:30:06<20:15:56, 1.38s/it] 14%|█▍ | 8918/61904 [4:30:07<20:11:06, 1.37s/it] 14%|█▍ | 8919/61904 [4:30:08<19:45:28, 1.34s/it] 14%|█▍ | 8920/61904 [4:30:09<19:20:16, 1.31s/it] {'loss': 2.8396, 'learning_rate': 1.858680150395436e-07, 'epoch': 2.31} + 14%|█▍ | 8920/61904 [4:30:09<19:20:16, 1.31s/it] 14%|█▍ | 8921/61904 [4:30:11<19:18:09, 1.31s/it] 14%|█▍ | 8922/61904 [4:30:12<19:02:03, 1.29s/it] 14%|█▍ | 8923/61904 [4:30:14<21:11:01, 1.44s/it] 14%|█▍ | 8924/61904 [4:30:15<21:03:46, 1.43s/it] 14%|█▍ | 8925/61904 [4:30:17<21:08:58, 1.44s/it] 14%|█▍ | 8926/61904 [4:30:18<21:11:40, 1.44s/it] 14%|█▍ | 8927/61904 [4:30:19<20:37:57, 1.40s/it] 14%|█▍ | 8928/61904 [4:30:21<20:40:51, 1.41s/it] 14%|█▍ | 8929/61904 [4:30:22<20:42:42, 1.41s/it] 14%|█▍ | 8930/61904 [4:30:24<20:16:39, 1.38s/it] 14%|█▍ | 8931/61904 [4:30:25<20:10:23, 1.37s/it] 14%|█▍ | 8932/61904 [4:30:26<19:45:38, 1.34s/it] 14%|█▍ | 8933/61904 [4:30:27<19:45:30, 1.34s/it] 14%|█▍ | 8934/61904 [4:30:29<19:13:42, 1.31s/it] 14%|█▍ | 8935/61904 [4:30:30<20:04:14, 1.36s/it] 14%|█▍ | 8936/61904 [4:30:32<20:44:50, 1.41s/it] 14%|█▍ | 8937/61904 [4:30:33<20:23:44, 1.39s/it] 14%|█▍ | 8938/61904 [4:30:34<20:10:20, 1.37s/it] 14%|█▍ | 8939/61904 [4:30:36<20:29:07, 1.39s/it] 14%|█▍ | 8940/61904 [4:30:37<20:14:51, 1.38s/it] {'loss': 2.8307, 'learning_rate': 1.8583560223000127e-07, 'epoch': 2.31} + 14%|█▍ | 8940/61904 [4:30:37<20:14:51, 1.38s/it] 14%|█▍ | 8941/61904 [4:30:39<20:12:31, 1.37s/it] 14%|█▍ | 8942/61904 [4:30:40<20:16:15, 1.38s/it] 14%|█▍ | 8943/61904 [4:30:41<20:11:48, 1.37s/it] 14%|█▍ | 8944/61904 [4:30:43<19:52:15, 1.35s/it] 14%|█▍ | 8945/61904 [4:30:44<20:05:50, 1.37s/it] 14%|█▍ | 8946/61904 [4:30:45<19:47:47, 1.35s/it] 14%|█▍ | 8947/61904 [4:30:47<19:49:13, 1.35s/it] 14%|█▍ | 8948/61904 [4:30:48<19:58:10, 1.36s/it] 14%|█▍ | 8949/61904 [4:30:50<20:41:35, 1.41s/it] 14%|█▍ | 8950/61904 [4:30:51<20:10:39, 1.37s/it] 14%|█▍ | 8951/61904 [4:30:52<20:01:07, 1.36s/it] 14%|█▍ | 8952/61904 [4:30:54<19:59:39, 1.36s/it] 14%|█▍ | 8953/61904 [4:30:55<20:01:22, 1.36s/it] 14%|█▍ | 8954/61904 [4:30:56<19:44:44, 1.34s/it] 14%|█▍ | 8955/61904 [4:30:58<19:49:36, 1.35s/it] 14%|█▍ | 8956/61904 [4:30:59<19:40:59, 1.34s/it] 14%|█▍ | 8957/61904 [4:31:00<19:09:38, 1.30s/it] 14%|█▍ | 8958/61904 [4:31:01<18:47:52, 1.28s/it] 14%|█▍ | 8959/61904 [4:31:03<19:18:30, 1.31s/it] 14%|█▍ | 8960/61904 [4:31:04<19:40:16, 1.34s/it] {'loss': 2.895, 'learning_rate': 1.8580318942045896e-07, 'epoch': 2.32} + 14%|█▍ | 8960/61904 [4:31:04<19:40:16, 1.34s/it] 14%|█▍ | 8961/61904 [4:31:06<20:00:51, 1.36s/it] 14%|█▍ | 8962/61904 [4:31:07<19:46:45, 1.34s/it] 14%|█▍ | 8963/61904 [4:31:08<19:24:08, 1.32s/it] 14%|█▍ | 8964/61904 [4:31:09<19:28:50, 1.32s/it] 14%|█▍ | 8965/61904 [4:31:11<19:06:35, 1.30s/it] 14%|█▍ | 8966/61904 [4:31:12<19:38:29, 1.34s/it] 14%|█▍ | 8967/61904 [4:31:13<19:32:50, 1.33s/it] 14%|█▍ | 8968/61904 [4:31:15<20:17:00, 1.38s/it] 14%|█▍ | 8969/61904 [4:31:16<19:27:20, 1.32s/it] 14%|█▍ | 8970/61904 [4:31:17<19:26:24, 1.32s/it] 14%|█▍ | 8971/61904 [4:31:19<19:10:14, 1.30s/it] 14%|█▍ | 8972/61904 [4:31:20<19:03:53, 1.30s/it] 14%|█▍ | 8973/61904 [4:31:21<18:49:56, 1.28s/it] 14%|█▍ | 8974/61904 [4:31:23<19:30:32, 1.33s/it] 14%|█▍ | 8975/61904 [4:31:24<19:29:56, 1.33s/it] 14%|█▍ | 8976/61904 [4:31:25<20:06:44, 1.37s/it] 15%|█▍ | 8977/61904 [4:31:27<21:27:51, 1.46s/it] 15%|█▍ | 8978/61904 [4:31:29<21:24:19, 1.46s/it] 15%|█▍ | 8979/61904 [4:31:30<21:11:22, 1.44s/it] 15%|█▍ | 8980/61904 [4:31:31<20:53:06, 1.42s/it] {'loss': 2.8683, 'learning_rate': 1.8577077661091662e-07, 'epoch': 2.32} + 15%|█▍ | 8980/61904 [4:31:31<20:53:06, 1.42s/it] 15%|█▍ | 8981/61904 [4:31:33<20:41:51, 1.41s/it] 15%|█▍ | 8982/61904 [4:31:34<20:17:30, 1.38s/it] 15%|█▍ | 8983/61904 [4:31:35<20:33:21, 1.40s/it] 15%|█▍ | 8984/61904 [4:31:37<20:13:25, 1.38s/it] 15%|█▍ | 8985/61904 [4:31:38<19:49:21, 1.35s/it] 15%|█▍ | 8986/61904 [4:31:39<19:42:01, 1.34s/it] 15%|█▍ | 8987/61904 [4:31:41<19:30:38, 1.33s/it] 15%|█▍ | 8988/61904 [4:31:42<19:35:28, 1.33s/it] 15%|█▍ | 8989/61904 [4:31:43<19:22:55, 1.32s/it] 15%|█▍ | 8990/61904 [4:31:45<19:37:14, 1.33s/it] 15%|█▍ | 8991/61904 [4:31:46<19:32:56, 1.33s/it] 15%|█▍ | 8992/61904 [4:31:47<19:55:37, 1.36s/it] 15%|█▍ | 8993/61904 [4:31:49<19:45:24, 1.34s/it] 15%|█▍ | 8994/61904 [4:31:50<20:07:33, 1.37s/it] 15%|█▍ | 8995/61904 [4:31:52<20:08:53, 1.37s/it] 15%|█▍ | 8996/61904 [4:31:53<20:15:18, 1.38s/it] 15%|█▍ | 8997/61904 [4:31:54<20:13:59, 1.38s/it] 15%|█▍ | 8998/61904 [4:31:56<20:46:33, 1.41s/it] 15%|█▍ | 8999/61904 [4:31:57<20:27:49, 1.39s/it] 15%|█▍ | 9000/61904 [4:31:59<20:47:36, 1.41s/it] {'loss': 2.8272, 'learning_rate': 1.8573836380137428e-07, 'epoch': 2.33} + 15%|█▍ | 9000/61904 [4:31:59<20:47:36, 1.41s/it] 15%|█▍ | 9001/61904 [4:32:00<20:41:03, 1.41s/it] 15%|█▍ | 9002/61904 [4:32:01<20:13:47, 1.38s/it] 15%|█▍ | 9003/61904 [4:32:03<20:28:07, 1.39s/it] 15%|█▍ | 9004/61904 [4:32:04<20:07:21, 1.37s/it] 15%|█▍ | 9005/61904 [4:32:05<20:06:39, 1.37s/it] 15%|█▍ | 9006/61904 [4:32:07<19:38:32, 1.34s/it] 15%|█▍ | 9007/61904 [4:32:08<19:28:42, 1.33s/it] 15%|█▍ | 9008/61904 [4:32:09<19:59:59, 1.36s/it] 15%|█▍ | 9009/61904 [4:32:11<19:59:40, 1.36s/it] 15%|█▍ | 9010/61904 [4:32:12<20:13:02, 1.38s/it] 15%|█▍ | 9011/61904 [4:32:14<20:30:36, 1.40s/it] 15%|█▍ | 9012/61904 [4:32:15<20:04:03, 1.37s/it] 15%|█▍ | 9013/61904 [4:32:16<20:02:49, 1.36s/it] 15%|█▍ | 9014/61904 [4:32:18<19:34:00, 1.33s/it] 15%|█▍ | 9015/61904 [4:32:19<20:10:24, 1.37s/it] 15%|█▍ | 9016/61904 [4:32:20<19:47:21, 1.35s/it] 15%|█▍ | 9017/61904 [4:32:22<19:40:14, 1.34s/it] 15%|█▍ | 9018/61904 [4:32:23<19:57:33, 1.36s/it] 15%|█▍ | 9019/61904 [4:32:24<20:29:06, 1.39s/it] 15%|█▍ | 9020/61904 [4:32:26<20:35:22, 1.40s/it] {'loss': 2.8701, 'learning_rate': 1.8570595099183197e-07, 'epoch': 2.33} + 15%|█▍ | 9020/61904 [4:32:26<20:35:22, 1.40s/it] 15%|█▍ | 9021/61904 [4:32:27<20:08:38, 1.37s/it] 15%|█▍ | 9022/61904 [4:32:28<19:45:20, 1.34s/it] 15%|█▍ | 9023/61904 [4:32:30<19:58:17, 1.36s/it] 15%|█▍ | 9024/61904 [4:32:31<19:16:34, 1.31s/it] 15%|█▍ | 9025/61904 [4:32:33<20:17:19, 1.38s/it] 15%|█▍ | 9026/61904 [4:32:34<20:30:19, 1.40s/it] 15%|█▍ | 9027/61904 [4:32:36<21:00:36, 1.43s/it] 15%|█▍ | 9028/61904 [4:32:37<21:20:55, 1.45s/it] 15%|█▍ | 9029/61904 [4:32:38<20:19:53, 1.38s/it] 15%|█▍ | 9030/61904 [4:32:40<20:13:44, 1.38s/it] 15%|█▍ | 9031/61904 [4:32:41<20:00:54, 1.36s/it] 15%|█▍ | 9032/61904 [4:32:42<19:57:33, 1.36s/it] 15%|█▍ | 9033/61904 [4:32:44<20:13:04, 1.38s/it] 15%|█▍ | 9034/61904 [4:32:45<20:30:53, 1.40s/it] 15%|█▍ | 9035/61904 [4:32:47<20:32:08, 1.40s/it] 15%|█▍ | 9036/61904 [4:32:48<20:41:09, 1.41s/it] 15%|█▍ | 9037/61904 [4:32:49<20:37:17, 1.40s/it] 15%|█▍ | 9038/61904 [4:32:51<20:27:50, 1.39s/it] 15%|█▍ | 9039/61904 [4:32:52<19:57:07, 1.36s/it] 15%|█▍ | 9040/61904 [4:32:53<20:05:15, 1.37s/it] {'loss': 2.9526, 'learning_rate': 1.8567353818228963e-07, 'epoch': 2.34} + 15%|█▍ | 9040/61904 [4:32:53<20:05:15, 1.37s/it] 15%|█▍ | 9041/61904 [4:32:55<20:06:44, 1.37s/it] 15%|█▍ | 9042/61904 [4:32:56<19:59:35, 1.36s/it] 15%|█▍ | 9043/61904 [4:32:58<19:51:40, 1.35s/it] 15%|█▍ | 9044/61904 [4:32:59<20:03:29, 1.37s/it] 15%|█▍ | 9045/61904 [4:33:00<19:53:09, 1.35s/it] 15%|█▍ | 9046/61904 [4:33:02<19:41:24, 1.34s/it] 15%|█▍ | 9047/61904 [4:33:03<19:34:33, 1.33s/it] 15%|█▍ | 9048/61904 [4:33:04<20:13:13, 1.38s/it] 15%|█▍ | 9049/61904 [4:33:06<21:20:50, 1.45s/it] 15%|█▍ | 9050/61904 [4:33:07<20:54:30, 1.42s/it] 15%|█▍ | 9051/61904 [4:33:09<20:19:13, 1.38s/it] 15%|█▍ | 9052/61904 [4:33:10<19:55:18, 1.36s/it] 15%|█▍ | 9053/61904 [4:33:11<20:18:54, 1.38s/it] 15%|█▍ | 9054/61904 [4:33:13<20:02:04, 1.36s/it] 15%|█▍ | 9055/61904 [4:33:14<19:56:47, 1.36s/it] 15%|█▍ | 9056/61904 [4:33:15<20:11:00, 1.37s/it] 15%|█▍ | 9057/61904 [4:33:17<20:34:54, 1.40s/it] 15%|█▍ | 9058/61904 [4:33:18<20:29:54, 1.40s/it] 15%|█▍ | 9059/61904 [4:33:20<20:34:51, 1.40s/it] 15%|█▍ | 9060/61904 [4:33:21<20:16:11, 1.38s/it] {'loss': 2.8748, 'learning_rate': 1.856411253727473e-07, 'epoch': 2.34} + 15%|█▍ | 9060/61904 [4:33:21<20:16:11, 1.38s/it] 15%|█▍ | 9061/61904 [4:33:22<19:39:32, 1.34s/it] 15%|█▍ | 9062/61904 [4:33:24<19:29:55, 1.33s/it] 15%|█▍ | 9063/61904 [4:33:25<19:26:09, 1.32s/it] 15%|█▍ | 9064/61904 [4:33:26<19:42:17, 1.34s/it] 15%|█▍ | 9065/61904 [4:33:28<19:59:01, 1.36s/it] 15%|█▍ | 9066/61904 [4:33:29<19:59:21, 1.36s/it] 15%|█▍ | 9067/61904 [4:33:30<19:59:51, 1.36s/it] 15%|█▍ | 9068/61904 [4:33:32<19:29:47, 1.33s/it] 15%|█▍ | 9069/61904 [4:33:33<19:44:51, 1.35s/it] 15%|█▍ | 9070/61904 [4:33:34<20:06:34, 1.37s/it] 15%|█▍ | 9071/61904 [4:33:36<20:38:46, 1.41s/it] 15%|█▍ | 9072/61904 [4:33:37<19:52:09, 1.35s/it] 15%|█▍ | 9073/61904 [4:33:39<19:39:54, 1.34s/it] 15%|█▍ | 9074/61904 [4:33:40<19:42:33, 1.34s/it] 15%|█▍ | 9075/61904 [4:33:41<19:46:46, 1.35s/it] 15%|█▍ | 9076/61904 [4:33:43<19:33:29, 1.33s/it] 15%|█▍ | 9077/61904 [4:33:44<19:29:40, 1.33s/it] 15%|█▍ | 9078/61904 [4:33:45<20:00:38, 1.36s/it] 15%|█▍ | 9079/61904 [4:33:47<19:55:14, 1.36s/it] 15%|█▍ | 9080/61904 [4:33:48<20:07:01, 1.37s/it] {'loss': 2.8858, 'learning_rate': 1.8560871256320498e-07, 'epoch': 2.35} + 15%|█▍ | 9080/61904 [4:33:48<20:07:01, 1.37s/it] 15%|█▍ | 9081/61904 [4:33:50<20:38:05, 1.41s/it] 15%|█▍ | 9082/61904 [4:33:51<21:07:48, 1.44s/it] 15%|█▍ | 9083/61904 [4:33:52<20:37:42, 1.41s/it] 15%|█▍ | 9084/61904 [4:33:54<20:42:36, 1.41s/it] 15%|█▍ | 9085/61904 [4:33:55<20:55:36, 1.43s/it] 15%|█▍ | 9086/61904 [4:33:57<20:27:35, 1.39s/it] 15%|█▍ | 9087/61904 [4:33:58<19:35:39, 1.34s/it] 15%|█▍ | 9088/61904 [4:33:59<20:00:16, 1.36s/it] 15%|█▍ | 9089/61904 [4:34:01<19:51:33, 1.35s/it] 15%|█▍ | 9090/61904 [4:34:02<19:34:30, 1.33s/it] 15%|█▍ | 9091/61904 [4:34:03<19:23:38, 1.32s/it] 15%|█▍ | 9092/61904 [4:34:04<19:22:58, 1.32s/it] 15%|█▍ | 9093/61904 [4:34:06<19:23:06, 1.32s/it] 15%|█▍ | 9094/61904 [4:34:07<20:05:59, 1.37s/it] 15%|█▍ | 9095/61904 [4:34:09<19:57:41, 1.36s/it] 15%|█▍ | 9096/61904 [4:34:10<19:31:30, 1.33s/it] 15%|█▍ | 9097/61904 [4:34:11<20:23:05, 1.39s/it] 15%|█▍ | 9098/61904 [4:34:13<20:15:21, 1.38s/it] 15%|█▍ | 9099/61904 [4:34:14<19:50:36, 1.35s/it] 15%|█▍ | 9100/61904 [4:34:15<19:38:52, 1.34s/it] {'loss': 2.9003, 'learning_rate': 1.8557629975366262e-07, 'epoch': 2.35} + 15%|█▍ | 9100/61904 [4:34:15<19:38:52, 1.34s/it] 15%|█▍ | 9101/61904 [4:34:17<19:49:06, 1.35s/it] 15%|█▍ | 9102/61904 [4:34:18<19:10:57, 1.31s/it] 15%|█▍ | 9103/61904 [4:34:19<19:50:53, 1.35s/it] 15%|█▍ | 9104/61904 [4:34:21<20:08:11, 1.37s/it] 15%|█▍ | 9105/61904 [4:34:22<19:52:30, 1.36s/it] 15%|█▍ | 9106/61904 [4:34:23<20:01:33, 1.37s/it] 15%|█▍ | 9107/61904 [4:34:25<19:45:21, 1.35s/it] 15%|█▍ | 9108/61904 [4:34:26<19:31:45, 1.33s/it] 15%|█▍ | 9109/61904 [4:34:27<19:14:36, 1.31s/it] 15%|█▍ | 9110/61904 [4:34:29<20:16:45, 1.38s/it] 15%|█▍ | 9111/61904 [4:34:30<20:27:39, 1.40s/it] 15%|█▍ | 9112/61904 [4:34:32<21:00:04, 1.43s/it] 15%|█▍ | 9113/61904 [4:34:33<20:49:52, 1.42s/it] 15%|█▍ | 9114/61904 [4:34:35<20:26:13, 1.39s/it] 15%|█▍ | 9115/61904 [4:34:36<20:39:58, 1.41s/it] 15%|█▍ | 9116/61904 [4:34:38<21:14:31, 1.45s/it] 15%|█▍ | 9117/61904 [4:34:39<21:01:26, 1.43s/it] 15%|█▍ | 9118/61904 [4:34:40<21:15:01, 1.45s/it] 15%|█▍ | 9119/61904 [4:34:42<20:20:57, 1.39s/it] 15%|█▍ | 9120/61904 [4:34:43<20:06:32, 1.37s/it] {'loss': 2.8281, 'learning_rate': 1.855438869441203e-07, 'epoch': 2.36} + 15%|█▍ | 9120/61904 [4:34:43<20:06:32, 1.37s/it] 15%|█▍ | 9121/61904 [4:34:45<20:41:01, 1.41s/it] 15%|█▍ | 9122/61904 [4:34:46<20:27:30, 1.40s/it] 15%|█▍ | 9123/61904 [4:34:47<20:14:43, 1.38s/it] 15%|█▍ | 9124/61904 [4:34:49<20:59:07, 1.43s/it] 15%|█▍ | 9125/61904 [4:34:50<20:26:11, 1.39s/it] 15%|█▍ | 9126/61904 [4:34:51<19:55:30, 1.36s/it] 15%|█▍ | 9127/61904 [4:34:53<19:48:41, 1.35s/it] 15%|█▍ | 9128/61904 [4:34:54<20:06:31, 1.37s/it] 15%|█▍ | 9129/61904 [4:34:55<19:37:52, 1.34s/it] 15%|█▍ | 9130/61904 [4:34:57<19:42:33, 1.34s/it] 15%|█▍ | 9131/61904 [4:34:58<19:42:27, 1.34s/it] 15%|█▍ | 9132/61904 [4:34:59<19:27:54, 1.33s/it] 15%|█▍ | 9133/61904 [4:35:01<20:07:03, 1.37s/it] 15%|█▍ | 9134/61904 [4:35:02<20:34:04, 1.40s/it] 15%|█▍ | 9135/61904 [4:35:04<20:27:12, 1.40s/it] 15%|█▍ | 9136/61904 [4:35:05<20:33:03, 1.40s/it] 15%|█▍ | 9137/61904 [4:35:06<20:08:18, 1.37s/it] 15%|█▍ | 9138/61904 [4:35:08<19:47:02, 1.35s/it] 15%|█▍ | 9139/61904 [4:35:09<19:49:32, 1.35s/it] 15%|█▍ | 9140/61904 [4:35:11<20:29:52, 1.40s/it] {'loss': 2.81, 'learning_rate': 1.8551147413457797e-07, 'epoch': 2.36} + 15%|█▍ | 9140/61904 [4:35:11<20:29:52, 1.40s/it] 15%|█▍ | 9141/61904 [4:35:12<20:38:02, 1.41s/it] 15%|█▍ | 9142/61904 [4:35:13<20:54:04, 1.43s/it] 15%|█▍ | 9143/61904 [4:35:15<20:33:41, 1.40s/it] 15%|█▍ | 9144/61904 [4:35:16<20:06:26, 1.37s/it] 15%|█▍ | 9145/61904 [4:35:17<19:56:33, 1.36s/it] 15%|█▍ | 9146/61904 [4:35:19<19:50:50, 1.35s/it] 15%|█▍ | 9147/61904 [4:35:20<20:41:43, 1.41s/it] 15%|█▍ | 9148/61904 [4:35:22<20:13:23, 1.38s/it] 15%|█▍ | 9149/61904 [4:35:23<19:53:10, 1.36s/it] 15%|█▍ | 9150/61904 [4:35:24<19:48:45, 1.35s/it] 15%|█▍ | 9151/61904 [4:35:26<20:27:37, 1.40s/it] 15%|█▍ | 9152/61904 [4:35:27<20:17:00, 1.38s/it] 15%|█▍ | 9153/61904 [4:35:29<20:11:07, 1.38s/it] 15%|█▍ | 9154/61904 [4:35:30<19:40:36, 1.34s/it] 15%|█▍ | 9155/61904 [4:35:31<19:34:16, 1.34s/it] 15%|█▍ | 9156/61904 [4:35:32<19:39:01, 1.34s/it] 15%|█▍ | 9157/61904 [4:35:34<19:29:20, 1.33s/it] 15%|█▍ | 9158/61904 [4:35:35<19:30:00, 1.33s/it] 15%|█▍ | 9159/61904 [4:35:36<19:47:58, 1.35s/it] 15%|█▍ | 9160/61904 [4:35:38<21:13:12, 1.45s/it] {'loss': 2.8814, 'learning_rate': 1.8547906132503563e-07, 'epoch': 2.37} + 15%|█▍ | 9160/61904 [4:35:38<21:13:12, 1.45s/it] 15%|█▍ | 9161/61904 [4:35:39<20:39:14, 1.41s/it] 15%|█▍ | 9162/61904 [4:35:41<20:37:54, 1.41s/it] 15%|█▍ | 9163/61904 [4:35:42<20:22:06, 1.39s/it] 15%|█▍ | 9164/61904 [4:35:44<19:56:57, 1.36s/it] 15%|█▍ | 9165/61904 [4:35:45<19:53:51, 1.36s/it] 15%|█▍ | 9166/61904 [4:35:46<19:54:28, 1.36s/it] 15%|█▍ | 9167/61904 [4:35:48<19:34:03, 1.34s/it] 15%|█▍ | 9168/61904 [4:35:49<19:47:48, 1.35s/it] 15%|█▍ | 9169/61904 [4:35:50<19:40:51, 1.34s/it] 15%|█▍ | 9170/61904 [4:35:52<20:03:46, 1.37s/it] 15%|█▍ | 9171/61904 [4:35:53<19:36:49, 1.34s/it] 15%|█▍ | 9172/61904 [4:35:54<19:51:35, 1.36s/it] 15%|█▍ | 9173/61904 [4:35:56<20:22:12, 1.39s/it] 15%|█▍ | 9174/61904 [4:35:57<19:37:43, 1.34s/it] 15%|█▍ | 9175/61904 [4:35:58<19:43:00, 1.35s/it] 15%|█▍ | 9176/61904 [4:36:00<19:49:13, 1.35s/it] 15%|█▍ | 9177/61904 [4:36:01<20:08:07, 1.37s/it] 15%|█▍ | 9178/61904 [4:36:03<21:05:06, 1.44s/it] 15%|█▍ | 9179/61904 [4:36:04<21:15:20, 1.45s/it] 15%|█▍ | 9180/61904 [4:36:06<21:03:02, 1.44s/it] {'loss': 2.9104, 'learning_rate': 1.8544664851549332e-07, 'epoch': 2.37} + 15%|█▍ | 9180/61904 [4:36:06<21:03:02, 1.44s/it] 15%|█▍ | 9181/61904 [4:36:07<21:23:36, 1.46s/it] 15%|█▍ | 9182/61904 [4:36:09<20:58:53, 1.43s/it] 15%|█▍ | 9183/61904 [4:36:10<20:29:36, 1.40s/it] 15%|█▍ | 9184/61904 [4:36:11<20:07:08, 1.37s/it] 15%|█▍ | 9185/61904 [4:36:13<20:11:14, 1.38s/it] 15%|█▍ | 9186/61904 [4:36:14<20:09:35, 1.38s/it] 15%|█▍ | 9187/61904 [4:36:15<19:53:26, 1.36s/it] 15%|█▍ | 9188/61904 [4:36:17<19:48:37, 1.35s/it] 15%|█▍ | 9189/61904 [4:36:18<19:41:45, 1.35s/it] 15%|█▍ | 9190/61904 [4:36:19<19:41:51, 1.35s/it] 15%|█▍ | 9191/61904 [4:36:21<19:51:02, 1.36s/it] 15%|█▍ | 9192/61904 [4:36:22<20:23:25, 1.39s/it] 15%|█▍ | 9193/61904 [4:36:23<19:48:45, 1.35s/it] 15%|█▍ | 9194/61904 [4:36:25<19:57:45, 1.36s/it] 15%|█▍ | 9195/61904 [4:36:26<19:31:34, 1.33s/it] 15%|█▍ | 9196/61904 [4:36:27<19:01:22, 1.30s/it] 15%|█▍ | 9197/61904 [4:36:29<19:06:34, 1.31s/it] 15%|█▍ | 9198/61904 [4:36:30<19:12:42, 1.31s/it] 15%|█▍ | 9199/61904 [4:36:31<19:17:00, 1.32s/it] 15%|█▍ | 9200/61904 [4:36:33<19:32:39, 1.34s/it] {'loss': 2.8546, 'learning_rate': 1.8541423570595098e-07, 'epoch': 2.38} + 15%|█▍ | 9200/61904 [4:36:33<19:32:39, 1.34s/it] 15%|█▍ | 9201/61904 [4:36:34<19:43:16, 1.35s/it] 15%|█▍ | 9202/61904 [4:36:35<20:07:39, 1.37s/it] 15%|█▍ | 9203/61904 [4:36:37<19:53:53, 1.36s/it] 15%|█▍ | 9204/61904 [4:36:38<19:45:54, 1.35s/it] 15%|█▍ | 9205/61904 [4:36:39<20:04:47, 1.37s/it] 15%|█▍ | 9206/61904 [4:36:41<20:02:50, 1.37s/it] 15%|█▍ | 9207/61904 [4:36:42<20:07:18, 1.37s/it] 15%|█▍ | 9208/61904 [4:36:44<19:53:19, 1.36s/it] 15%|█▍ | 9209/61904 [4:36:45<20:26:33, 1.40s/it] 15%|█▍ | 9210/61904 [4:36:46<19:55:04, 1.36s/it] 15%|█▍ | 9211/61904 [4:36:48<20:26:16, 1.40s/it] 15%|█▍ | 9212/61904 [4:36:49<19:58:35, 1.36s/it] 15%|█▍ | 9213/61904 [4:36:51<20:21:29, 1.39s/it] 15%|█▍ | 9214/61904 [4:36:52<19:58:07, 1.36s/it] 15%|█▍ | 9215/61904 [4:36:53<20:21:53, 1.39s/it] 15%|█▍ | 9216/61904 [4:36:55<19:46:46, 1.35s/it] 15%|█▍ | 9217/61904 [4:36:56<19:53:53, 1.36s/it] 15%|█▍ | 9218/61904 [4:36:57<19:24:22, 1.33s/it] 15%|█▍ | 9219/61904 [4:36:58<19:15:58, 1.32s/it] 15%|█▍ | 9220/61904 [4:37:00<19:46:48, 1.35s/it] {'loss': 2.8443, 'learning_rate': 1.8538182289640864e-07, 'epoch': 2.38} + 15%|█▍ | 9220/61904 [4:37:00<19:46:48, 1.35s/it] 15%|█▍ | 9221/61904 [4:37:01<19:28:59, 1.33s/it] 15%|█▍ | 9222/61904 [4:37:03<19:29:06, 1.33s/it] 15%|█▍ | 9223/61904 [4:37:04<19:43:08, 1.35s/it] 15%|█▍ | 9224/61904 [4:37:05<19:44:51, 1.35s/it] 15%|█▍ | 9225/61904 [4:37:07<19:58:35, 1.37s/it] 15%|█▍ | 9226/61904 [4:37:08<19:37:45, 1.34s/it] 15%|█▍ | 9227/61904 [4:37:09<19:28:07, 1.33s/it] 15%|█▍ | 9228/61904 [4:37:11<19:35:48, 1.34s/it] 15%|█▍ | 9229/61904 [4:37:12<19:35:33, 1.34s/it] 15%|█▍ | 9230/61904 [4:37:13<19:24:11, 1.33s/it] 15%|█▍ | 9231/61904 [4:37:14<19:02:30, 1.30s/it] 15%|█▍ | 9232/61904 [4:37:16<19:16:11, 1.32s/it] 15%|█▍ | 9233/61904 [4:37:17<20:15:15, 1.38s/it] 15%|█▍ | 9234/61904 [4:37:19<20:31:32, 1.40s/it] 15%|█▍ | 9235/61904 [4:37:20<20:27:45, 1.40s/it] 15%|█▍ | 9236/61904 [4:37:22<20:15:24, 1.38s/it] 15%|█▍ | 9237/61904 [4:37:23<19:52:14, 1.36s/it] 15%|█▍ | 9238/61904 [4:37:24<20:45:50, 1.42s/it] 15%|█▍ | 9239/61904 [4:37:26<20:03:54, 1.37s/it] 15%|█▍ | 9240/61904 [4:37:27<20:26:39, 1.40s/it] {'loss': 2.8323, 'learning_rate': 1.8534941008686633e-07, 'epoch': 2.39} + 15%|█▍ | 9240/61904 [4:37:27<20:26:39, 1.40s/it] 15%|█▍ | 9241/61904 [4:37:29<20:35:30, 1.41s/it] 15%|█▍ | 9242/61904 [4:37:30<20:11:19, 1.38s/it] 15%|█▍ | 9243/61904 [4:37:31<19:58:12, 1.37s/it] 15%|█▍ | 9244/61904 [4:37:33<19:34:08, 1.34s/it] 15%|█▍ | 9245/61904 [4:37:34<19:48:17, 1.35s/it] 15%|█▍ | 9246/61904 [4:37:35<20:12:55, 1.38s/it] 15%|█▍ | 9247/61904 [4:37:37<19:43:41, 1.35s/it] 15%|█▍ | 9248/61904 [4:37:38<19:33:37, 1.34s/it] 15%|█▍ | 9249/61904 [4:37:39<20:06:33, 1.37s/it] 15%|█▍ | 9250/61904 [4:37:41<19:51:48, 1.36s/it] 15%|█▍ | 9251/61904 [4:37:42<19:41:36, 1.35s/it] 15%|█▍ | 9252/61904 [4:37:43<20:00:22, 1.37s/it] 15%|█▍ | 9253/61904 [4:37:45<19:58:42, 1.37s/it] 15%|█▍ | 9254/61904 [4:37:46<19:37:55, 1.34s/it] 15%|█▍ | 9255/61904 [4:37:47<19:44:49, 1.35s/it] 15%|█▍ | 9256/61904 [4:37:49<19:54:04, 1.36s/it] 15%|█▍ | 9257/61904 [4:37:50<20:16:10, 1.39s/it] 15%|█▍ | 9258/61904 [4:37:52<20:00:32, 1.37s/it] 15%|█▍ | 9259/61904 [4:37:53<19:32:25, 1.34s/it] 15%|█▍ | 9260/61904 [4:37:54<19:22:16, 1.32s/it] {'loss': 2.8195, 'learning_rate': 1.85316997277324e-07, 'epoch': 2.39} + 15%|█▍ | 9260/61904 [4:37:54<19:22:16, 1.32s/it] 15%|█▍ | 9261/61904 [4:37:56<20:06:51, 1.38s/it] 15%|█▍ | 9262/61904 [4:37:57<19:33:09, 1.34s/it] 15%|█▍ | 9263/61904 [4:37:58<19:09:51, 1.31s/it] 15%|█▍ | 9264/61904 [4:38:00<19:42:29, 1.35s/it] 15%|█▍ | 9265/61904 [4:38:01<19:19:50, 1.32s/it] 15%|█▍ | 9266/61904 [4:38:02<19:43:57, 1.35s/it] 15%|█▍ | 9267/61904 [4:38:04<20:20:03, 1.39s/it] 15%|█▍ | 9268/61904 [4:38:05<20:02:26, 1.37s/it] 15%|█▍ | 9269/61904 [4:38:06<20:06:41, 1.38s/it] 15%|█▍ | 9270/61904 [4:38:08<20:11:05, 1.38s/it] 15%|█▍ | 9271/61904 [4:38:09<19:47:25, 1.35s/it] 15%|█▍ | 9272/61904 [4:38:11<20:11:31, 1.38s/it] 15%|█▍ | 9273/61904 [4:38:12<20:19:25, 1.39s/it] 15%|█▍ | 9274/61904 [4:38:13<20:19:10, 1.39s/it] 15%|█▍ | 9275/61904 [4:38:15<20:11:52, 1.38s/it] 15%|█▍ | 9276/61904 [4:38:16<20:08:03, 1.38s/it] 15%|█▍ | 9277/61904 [4:38:18<20:22:03, 1.39s/it] 15%|█▍ | 9278/61904 [4:38:19<21:03:11, 1.44s/it] 15%|█▍ | 9279/61904 [4:38:20<20:31:46, 1.40s/it] 15%|█▍ | 9280/61904 [4:38:22<20:43:52, 1.42s/it] {'loss': 2.7998, 'learning_rate': 1.8528458446778166e-07, 'epoch': 2.4} + 15%|█▍ | 9280/61904 [4:38:22<20:43:52, 1.42s/it] 15%|█▍ | 9281/61904 [4:38:23<20:24:04, 1.40s/it] 15%|█▍ | 9282/61904 [4:38:25<20:29:51, 1.40s/it] 15%|█▍ | 9283/61904 [4:38:26<20:20:27, 1.39s/it] 15%|█▍ | 9284/61904 [4:38:27<19:59:26, 1.37s/it] 15%|█▍ | 9285/61904 [4:38:29<20:06:34, 1.38s/it] 15%|█▌ | 9286/61904 [4:38:30<20:01:13, 1.37s/it] 15%|█▌ | 9287/61904 [4:38:32<20:39:36, 1.41s/it] 15%|█▌ | 9288/61904 [4:38:33<20:36:31, 1.41s/it] 15%|█▌ | 9289/61904 [4:38:34<20:35:45, 1.41s/it] 15%|█▌ | 9290/61904 [4:38:36<20:20:10, 1.39s/it] 15%|█▌ | 9291/61904 [4:38:37<20:30:53, 1.40s/it] 15%|█▌ | 9292/61904 [4:38:39<20:11:24, 1.38s/it] 15%|█▌ | 9293/61904 [4:38:40<19:46:15, 1.35s/it] 15%|█▌ | 9294/61904 [4:38:41<19:15:22, 1.32s/it] 15%|█▌ | 9295/61904 [4:38:42<19:18:59, 1.32s/it] 15%|█▌ | 9296/61904 [4:38:44<19:09:51, 1.31s/it] 15%|█▌ | 9297/61904 [4:38:45<19:28:18, 1.33s/it] 15%|█▌ | 9298/61904 [4:38:47<20:03:02, 1.37s/it] 15%|█▌ | 9299/61904 [4:38:48<19:57:33, 1.37s/it] 15%|█▌ | 9300/61904 [4:38:49<19:43:41, 1.35s/it] {'loss': 2.85, 'learning_rate': 1.8525217165823934e-07, 'epoch': 2.4} + 15%|█▌ | 9300/61904 [4:38:49<19:43:41, 1.35s/it] 15%|█▌ | 9301/61904 [4:38:50<19:26:06, 1.33s/it] 15%|█▌ | 9302/61904 [4:38:52<19:20:34, 1.32s/it] 15%|█▌ | 9303/61904 [4:38:53<18:52:43, 1.29s/it] 15%|█▌ | 9304/61904 [4:38:54<18:57:41, 1.30s/it] 15%|█��� | 9305/61904 [4:38:56<20:11:06, 1.38s/it] 15%|█▌ | 9306/61904 [4:38:57<20:05:54, 1.38s/it] 15%|█▌ | 9307/61904 [4:38:59<20:04:29, 1.37s/it] 15%|█▌ | 9308/61904 [4:39:00<19:45:11, 1.35s/it] 15%|█▌ | 9309/61904 [4:39:01<19:25:42, 1.33s/it] 15%|█▌ | 9310/61904 [4:39:02<19:21:03, 1.32s/it] 15%|█▌ | 9311/61904 [4:39:04<19:45:24, 1.35s/it] 15%|█▌ | 9312/61904 [4:39:05<19:55:07, 1.36s/it] 15%|█▌ | 9313/61904 [4:39:07<19:45:50, 1.35s/it] 15%|█▌ | 9314/61904 [4:39:08<20:34:09, 1.41s/it] 15%|█▌ | 9315/61904 [4:39:10<21:00:28, 1.44s/it] 15%|█▌ | 9316/61904 [4:39:11<20:30:43, 1.40s/it] 15%|█▌ | 9317/61904 [4:39:12<20:23:14, 1.40s/it] 15%|█▌ | 9318/61904 [4:39:14<20:30:47, 1.40s/it] 15%|█▌ | 9319/61904 [4:39:15<20:07:27, 1.38s/it] 15%|█▌ | 9320/61904 [4:39:16<19:26:37, 1.33s/it] {'loss': 2.8015, 'learning_rate': 1.8521975884869698e-07, 'epoch': 2.41} + 15%|█▌ | 9320/61904 [4:39:16<19:26:37, 1.33s/it] 15%|█▌ | 9321/61904 [4:39:18<19:17:45, 1.32s/it] 15%|█▌ | 9322/61904 [4:39:19<19:28:48, 1.33s/it] 15%|█▌ | 9323/61904 [4:39:20<19:12:25, 1.32s/it] 15%|█▌ | 9324/61904 [4:39:22<20:18:21, 1.39s/it] 15%|█▌ | 9325/61904 [4:39:23<20:22:53, 1.40s/it] 15%|█▌ | 9326/61904 [4:39:24<19:41:33, 1.35s/it] 15%|█▌ | 9327/61904 [4:39:26<19:43:46, 1.35s/it] 15%|█▌ | 9328/61904 [4:39:27<19:34:59, 1.34s/it] 15%|█▌ | 9329/61904 [4:39:28<19:21:51, 1.33s/it] 15%|█▌ | 9330/61904 [4:39:30<20:26:21, 1.40s/it] 15%|█▌ | 9331/61904 [4:39:32<20:58:33, 1.44s/it] 15%|█▌ | 9332/61904 [4:39:33<20:25:12, 1.40s/it] 15%|█▌ | 9333/61904 [4:39:34<21:27:08, 1.47s/it] 15%|█▌ | 9334/61904 [4:39:36<20:16:20, 1.39s/it] 15%|█▌ | 9335/61904 [4:39:37<20:37:45, 1.41s/it] 15%|█▌ | 9336/61904 [4:39:39<20:29:33, 1.40s/it] 15%|█▌ | 9337/61904 [4:39:40<19:54:30, 1.36s/it] 15%|█▌ | 9338/61904 [4:39:41<19:45:44, 1.35s/it] 15%|█▌ | 9339/61904 [4:39:42<19:21:51, 1.33s/it] 15%|█▌ | 9340/61904 [4:39:44<19:08:31, 1.31s/it] {'loss': 2.8752, 'learning_rate': 1.8518734603915467e-07, 'epoch': 2.41} + 15%|█▌ | 9340/61904 [4:39:44<19:08:31, 1.31s/it] 15%|█▌ | 9341/61904 [4:39:45<19:25:15, 1.33s/it] 15%|█▌ | 9342/61904 [4:39:46<19:57:18, 1.37s/it] 15%|█▌ | 9343/61904 [4:39:48<19:49:21, 1.36s/it] 15%|█▌ | 9344/61904 [4:39:49<20:09:06, 1.38s/it] 15%|█▌ | 9345/61904 [4:39:51<19:36:39, 1.34s/it] 15%|█▌ | 9346/61904 [4:39:52<19:33:01, 1.34s/it] 15%|█▌ | 9347/61904 [4:39:53<19:49:46, 1.36s/it] 15%|█▌ | 9348/61904 [4:39:55<19:52:00, 1.36s/it] 15%|█▌ | 9349/61904 [4:39:56<19:36:35, 1.34s/it] 15%|█▌ | 9350/61904 [4:39:57<20:23:16, 1.40s/it] 15%|█▌ | 9351/61904 [4:39:59<20:38:23, 1.41s/it] 15%|█▌ | 9352/61904 [4:40:00<20:15:09, 1.39s/it] 15%|█▌ | 9353/61904 [4:40:02<20:30:53, 1.41s/it] 15%|█▌ | 9354/61904 [4:40:03<20:20:03, 1.39s/it] 15%|█▌ | 9355/61904 [4:40:04<19:56:54, 1.37s/it] 15%|█▌ | 9356/61904 [4:40:06<20:52:11, 1.43s/it] 15%|█▌ | 9357/61904 [4:40:07<20:56:34, 1.43s/it] 15%|█▌ | 9358/61904 [4:40:09<20:30:00, 1.40s/it] 15%|█▌ | 9359/61904 [4:40:10<19:55:32, 1.37s/it] 15%|█▌ | 9360/61904 [4:40:11<19:50:05, 1.36s/it] {'loss': 2.7702, 'learning_rate': 1.8515493322961233e-07, 'epoch': 2.42} + 15%|█▌ | 9360/61904 [4:40:11<19:50:05, 1.36s/it] 15%|█▌ | 9361/61904 [4:40:13<19:39:03, 1.35s/it] 15%|█▌ | 9362/61904 [4:40:14<20:08:03, 1.38s/it] 15%|█▌ | 9363/61904 [4:40:15<19:39:00, 1.35s/it] 15%|█▌ | 9364/61904 [4:40:17<20:08:14, 1.38s/it] 15%|█▌ | 9365/61904 [4:40:18<19:49:39, 1.36s/it] 15%|█▌ | 9366/61904 [4:40:19<19:52:31, 1.36s/it] 15%|█▌ | 9367/61904 [4:40:21<20:00:47, 1.37s/it] 15%|█▌ | 9368/61904 [4:40:22<19:54:48, 1.36s/it] 15%|█▌ | 9369/61904 [4:40:24<19:37:34, 1.34s/it] 15%|█▌ | 9370/61904 [4:40:25<20:13:23, 1.39s/it] 15%|█▌ | 9371/61904 [4:40:26<19:27:07, 1.33s/it] 15%|█▌ | 9372/61904 [4:40:27<19:05:03, 1.31s/it] 15%|█▌ | 9373/61904 [4:40:29<19:27:55, 1.33s/it] 15%|█▌ | 9374/61904 [4:40:30<19:49:02, 1.36s/it] 15%|█▌ | 9375/61904 [4:40:32<20:40:08, 1.42s/it] 15%|█▌ | 9376/61904 [4:40:33<20:17:57, 1.39s/it] 15%|█▌ | 9377/61904 [4:40:35<21:05:42, 1.45s/it] 15%|█▌ | 9378/61904 [4:40:36<20:58:44, 1.44s/it] 15%|█▌ | 9379/61904 [4:40:38<20:34:19, 1.41s/it] 15%|█▌ | 9380/61904 [4:40:39<20:18:03, 1.39s/it] {'loss': 2.9099, 'learning_rate': 1.8512252042007e-07, 'epoch': 2.42} + 15%|█▌ | 9380/61904 [4:40:39<20:18:03, 1.39s/it] 15%|█▌ | 9381/61904 [4:40:40<20:45:04, 1.42s/it] 15%|█▌ | 9382/61904 [4:40:42<20:12:45, 1.39s/it] 15%|█▌ | 9383/61904 [4:40:43<19:17:49, 1.32s/it] 15%|█▌ | 9384/61904 [4:40:44<19:57:09, 1.37s/it] 15%|█▌ | 9385/61904 [4:40:46<19:55:31, 1.37s/it] 15%|█▌ | 9386/61904 [4:40:47<20:43:54, 1.42s/it] 15%|█▌ | 9387/61904 [4:40:49<21:09:02, 1.45s/it] 15%|█▌ | 9388/61904 [4:40:50<20:34:31, 1.41s/it] 15%|█▌ | 9389/61904 [4:40:51<20:10:45, 1.38s/it] 15%|█▌ | 9390/61904 [4:40:53<19:42:35, 1.35s/it] 15%|█▌ | 9391/61904 [4:40:54<19:19:43, 1.33s/it] 15%|█▌ | 9392/61904 [4:40:55<19:22:52, 1.33s/it] 15%|█▌ | 9393/61904 [4:40:57<19:29:27, 1.34s/it] 15%|█▌ | 9394/61904 [4:40:58<20:19:14, 1.39s/it] 15%|█▌ | 9395/61904 [4:40:59<19:56:06, 1.37s/it] 15%|█▌ | 9396/61904 [4:41:01<20:17:09, 1.39s/it] 15%|█▌ | 9397/61904 [4:41:02<20:08:57, 1.38s/it] 15%|█▌ | 9398/61904 [4:41:04<20:39:55, 1.42s/it] 15%|█▌ | 9399/61904 [4:41:05<20:10:51, 1.38s/it] 15%|█▌ | 9400/61904 [4:41:06<20:17:05, 1.39s/it] {'loss': 2.8563, 'learning_rate': 1.8509010761052768e-07, 'epoch': 2.43} + 15%|█▌ | 9400/61904 [4:41:06<20:17:05, 1.39s/it] 15%|█▌ | 9401/61904 [4:41:08<19:56:52, 1.37s/it] 15%|█▌ | 9402/61904 [4:41:09<19:37:51, 1.35s/it] 15%|█▌ | 9403/61904 [4:41:10<19:23:56, 1.33s/it] 15%|█▌ | 9404/61904 [4:41:12<19:03:41, 1.31s/it] 15%|█▌ | 9405/61904 [4:41:13<18:59:23, 1.30s/it] 15%|█▌ | 9406/61904 [4:41:14<19:08:28, 1.31s/it] 15%|█▌ | 9407/61904 [4:41:16<19:03:10, 1.31s/it] 15%|█▌ | 9408/61904 [4:41:17<18:46:33, 1.29s/it] 15%|█▌ | 9409/61904 [4:41:18<19:26:43, 1.33s/it] 15%|█▌ | 9410/61904 [4:41:20<19:59:43, 1.37s/it] 15%|█▌ | 9411/61904 [4:41:21<20:20:24, 1.39s/it] 15%|█▌ | 9412/61904 [4:41:23<20:26:54, 1.40s/it] 15%|█▌ | 9413/61904 [4:41:24<19:58:56, 1.37s/it] 15%|█▌ | 9414/61904 [4:41:25<19:31:02, 1.34s/it] 15%|█▌ | 9415/61904 [4:41:26<19:25:24, 1.33s/it] 15%|█▌ | 9416/61904 [4:41:28<19:02:25, 1.31s/it] 15%|█▌ | 9417/61904 [4:41:29<19:32:24, 1.34s/it] 15%|█▌ | 9418/61904 [4:41:30<19:35:22, 1.34s/it] 15%|█▌ | 9419/61904 [4:41:32<20:36:02, 1.41s/it] 15%|█▌ | 9420/61904 [4:41:33<20:34:12, 1.41s/it] {'loss': 2.7581, 'learning_rate': 1.8505769480098534e-07, 'epoch': 2.43} + 15%|█▌ | 9420/61904 [4:41:33<20:34:12, 1.41s/it] 15%|█▌ | 9421/61904 [4:41:35<20:44:43, 1.42s/it] 15%|█▌ | 9422/61904 [4:41:36<20:53:11, 1.43s/it] 15%|█▌ | 9423/61904 [4:41:38<20:10:28, 1.38s/it] 15%|█▌ | 9424/61904 [4:41:39<20:00:07, 1.37s/it] 15%|█▌ | 9425/61904 [4:41:40<20:37:10, 1.41s/it] 15%|█▌ | 9426/61904 [4:41:42<20:24:43, 1.40s/it] 15%|█▌ | 9427/61904 [4:41:43<21:03:09, 1.44s/it] 15%|█▌ | 9428/61904 [4:41:45<21:01:30, 1.44s/it] 15%|█▌ | 9429/61904 [4:41:46<21:01:48, 1.44s/it] 15%|█▌ | 9430/61904 [4:41:47<20:05:23, 1.38s/it] 15%|█▌ | 9431/61904 [4:41:49<19:54:42, 1.37s/it] 15%|█▌ | 9432/61904 [4:41:50<19:45:49, 1.36s/it] 15%|█▌ | 9433/61904 [4:41:52<20:41:41, 1.42s/it] 15%|█▌ | 9434/61904 [4:41:53<20:25:31, 1.40s/it] 15%|█▌ | 9435/61904 [4:41:54<19:59:32, 1.37s/it] 15%|█▌ | 9436/61904 [4:41:56<20:20:47, 1.40s/it] 15%|█▌ | 9437/61904 [4:41:57<19:55:51, 1.37s/it] 15%|█▌ | 9438/61904 [4:41:59<20:12:41, 1.39s/it] 15%|█▌ | 9439/61904 [4:42:00<19:51:41, 1.36s/it] 15%|█▌ | 9440/61904 [4:42:01<20:09:53, 1.38s/it] {'loss': 2.9199, 'learning_rate': 1.85025281991443e-07, 'epoch': 2.44} + 15%|█▌ | 9440/61904 [4:42:01<20:09:53, 1.38s/it] 15%|█▌ | 9441/61904 [4:42:03<20:33:27, 1.41s/it] 15%|█▌ | 9442/61904 [4:42:04<20:14:39, 1.39s/it] 15%|█▌ | 9443/61904 [4:42:06<20:18:45, 1.39s/it] 15%|█▌ | 9444/61904 [4:42:07<19:45:47, 1.36s/it] 15%|█▌ | 9445/61904 [4:42:08<19:42:13, 1.35s/it] 15%|█▌ | 9446/61904 [4:42:10<20:00:41, 1.37s/it] 15%|█▌ | 9447/61904 [4:42:11<20:11:21, 1.39s/it] 15%|█▌ | 9448/61904 [4:42:12<19:21:32, 1.33s/it] 15%|█▌ | 9449/61904 [4:42:14<20:34:12, 1.41s/it] 15%|█▌ | 9450/61904 [4:42:15<20:25:33, 1.40s/it] 15%|█▌ | 9451/61904 [4:42:17<20:43:35, 1.42s/it] 15%|█▌ | 9452/61904 [4:42:18<20:35:15, 1.41s/it] 15%|█▌ | 9453/61904 [4:42:19<20:40:05, 1.42s/it] 15%|█▌ | 9454/61904 [4:42:21<19:42:57, 1.35s/it] 15%|█▌ | 9455/61904 [4:42:22<19:04:52, 1.31s/it] 15%|█▌ | 9456/61904 [4:42:23<18:33:41, 1.27s/it] 15%|█▌ | 9457/61904 [4:42:24<19:19:54, 1.33s/it] 15%|█▌ | 9458/61904 [4:42:26<19:41:31, 1.35s/it] 15%|█▌ | 9459/61904 [4:42:27<19:34:57, 1.34s/it] 15%|█▌ | 9460/61904 [4:42:29<19:46:10, 1.36s/it] {'loss': 2.8404, 'learning_rate': 1.849928691819007e-07, 'epoch': 2.44} + 15%|█▌ | 9460/61904 [4:42:29<19:46:10, 1.36s/it] 15%|█▌ | 9461/61904 [4:42:30<20:04:16, 1.38s/it] 15%|█▌ | 9462/61904 [4:42:31<19:42:01, 1.35s/it] 15%|█▌ | 9463/61904 [4:42:33<19:48:38, 1.36s/it] 15%|█▌ | 9464/61904 [4:42:34<19:19:55, 1.33s/it] 15%|█▌ | 9465/61904 [4:42:35<19:25:26, 1.33s/it] 15%|█▌ | 9466/61904 [4:42:37<19:06:20, 1.31s/it] 15%|█▌ | 9467/61904 [4:42:38<18:54:49, 1.30s/it] 15%|█▌ | 9468/61904 [4:42:39<19:21:42, 1.33s/it] 15%|█▌ | 9469/61904 [4:42:41<19:35:21, 1.34s/it] 15%|█▌ | 9470/61904 [4:42:42<19:09:30, 1.32s/it] 15%|█▌ | 9471/61904 [4:42:43<19:38:24, 1.35s/it] 15%|█▌ | 9472/61904 [4:42:45<19:21:11, 1.33s/it] 15%|█▌ | 9473/61904 [4:42:46<19:46:54, 1.36s/it] 15%|█▌ | 9474/61904 [4:42:47<19:51:16, 1.36s/it] 15%|█▌ | 9475/61904 [4:42:49<19:52:11, 1.36s/it] 15%|█▌ | 9476/61904 [4:42:50<20:51:53, 1.43s/it] 15%|█▌ | 9477/61904 [4:42:52<20:06:46, 1.38s/it] 15%|█▌ | 9478/61904 [4:42:53<20:09:13, 1.38s/it] 15%|█▌ | 9479/61904 [4:42:54<20:27:22, 1.40s/it] 15%|█▌ | 9480/61904 [4:42:56<20:04:50, 1.38s/it] {'loss': 2.8321, 'learning_rate': 1.8496045637235835e-07, 'epoch': 2.45} + 15%|█▌ | 9480/61904 [4:42:56<20:04:50, 1.38s/it] 15%|█▌ | 9481/61904 [4:42:57<20:18:21, 1.39s/it] 15%|█▌ | 9482/61904 [4:42:59<20:26:57, 1.40s/it] 15%|█▌ | 9483/61904 [4:43:00<19:45:40, 1.36s/it] 15%|█▌ | 9484/61904 [4:43:01<19:57:48, 1.37s/it] 15%|█▌ | 9485/61904 [4:43:03<19:28:47, 1.34s/it] 15%|█▌ | 9486/61904 [4:43:04<19:00:48, 1.31s/it] 15%|█▌ | 9487/61904 [4:43:05<19:20:34, 1.33s/it] 15%|█▌ | 9488/61904 [4:43:07<20:05:16, 1.38s/it] 15%|█▌ | 9489/61904 [4:43:08<19:43:00, 1.35s/it] 15%|█▌ | 9490/61904 [4:43:09<19:06:03, 1.31s/it] 15%|█▌ | 9491/61904 [4:43:10<18:36:38, 1.28s/it] 15%|█▌ | 9492/61904 [4:43:12<18:37:10, 1.28s/it] 15%|█▌ | 9493/61904 [4:43:13<18:34:58, 1.28s/it] 15%|█▌ | 9494/61904 [4:43:14<18:39:05, 1.28s/it] 15%|█▌ | 9495/61904 [4:43:16<19:06:05, 1.31s/it] 15%|█▌ | 9496/61904 [4:43:17<19:13:10, 1.32s/it] 15%|█▌ | 9497/61904 [4:43:18<19:43:21, 1.35s/it] 15%|█▌ | 9498/61904 [4:43:20<19:44:17, 1.36s/it] 15%|█▌ | 9499/61904 [4:43:21<19:46:34, 1.36s/it] 15%|█▌ | 9500/61904 [4:43:22<19:31:30, 1.34s/it] {'loss': 2.781, 'learning_rate': 1.8492804356281602e-07, 'epoch': 2.46} + 15%|█▌ | 9500/61904 [4:43:22<19:31:30, 1.34s/it] 15%|█▌ | 9501/61904 [4:43:24<19:13:11, 1.32s/it] 15%|█▌ | 9502/61904 [4:43:25<19:29:04, 1.34s/it] 15%|█▌ | 9503/61904 [4:43:26<19:36:21, 1.35s/it] 15%|█▌ | 9504/61904 [4:43:28<19:59:23, 1.37s/it] 15%|█▌ | 9505/61904 [4:43:29<19:41:37, 1.35s/it] 15%|█▌ | 9506/61904 [4:43:31<20:14:08, 1.39s/it] 15%|█▌ | 9507/61904 [4:43:32<19:56:53, 1.37s/it] 15%|█▌ | 9508/61904 [4:43:33<19:43:58, 1.36s/it] 15%|█▌ | 9509/61904 [4:43:35<20:18:00, 1.39s/it] 15%|█▌ | 9510/61904 [4:43:36<19:51:07, 1.36s/it] 15%|█▌ | 9511/61904 [4:43:37<20:09:37, 1.39s/it] 15%|█▌ | 9512/61904 [4:43:39<20:04:16, 1.38s/it] 15%|█▌ | 9513/61904 [4:43:40<19:35:12, 1.35s/it] 15%|█▌ | 9514/61904 [4:43:41<19:11:41, 1.32s/it] 15%|█▌ | 9515/61904 [4:43:43<19:37:50, 1.35s/it] 15%|█▌ | 9516/61904 [4:43:44<19:42:45, 1.35s/it] 15%|█▌ | 9517/61904 [4:43:45<19:38:08, 1.35s/it] 15%|█▌ | 9518/61904 [4:43:47<19:57:28, 1.37s/it] 15%|█▌ | 9519/61904 [4:43:48<20:12:35, 1.39s/it] 15%|█▌ | 9520/61904 [4:43:50<20:54:18, 1.44s/it] {'loss': 2.9004, 'learning_rate': 1.8489563075327368e-07, 'epoch': 2.46} + 15%|█▌ | 9520/61904 [4:43:50<20:54:18, 1.44s/it] 15%|█▌ | 9521/61904 [4:43:51<20:35:39, 1.42s/it] 15%|█▌ | 9522/61904 [4:43:53<20:47:02, 1.43s/it] 15%|█▌ | 9523/61904 [4:43:54<20:48:36, 1.43s/it] 15%|█▌ | 9524/61904 [4:43:55<20:01:08, 1.38s/it] 15%|█▌ | 9525/61904 [4:43:57<20:05:59, 1.38s/it] 15%|█▌ | 9526/61904 [4:43:58<21:29:20, 1.48s/it] 15%|█▌ | 9527/61904 [4:44:00<21:47:47, 1.50s/it] 15%|█▌ | 9528/61904 [4:44:02<21:41:08, 1.49s/it] 15%|█▌ | 9529/61904 [4:44:03<21:38:56, 1.49s/it] 15%|█▌ | 9530/61904 [4:44:04<20:44:24, 1.43s/it] 15%|█▌ | 9531/61904 [4:44:06<20:27:50, 1.41s/it] 15%|█▌ | 9532/61904 [4:44:07<21:10:16, 1.46s/it] 15%|█▌ | 9533/61904 [4:44:09<21:00:34, 1.44s/it] 15%|█▌ | 9534/61904 [4:44:10<20:08:48, 1.38s/it] 15%|█▌ | 9535/61904 [4:44:11<19:48:47, 1.36s/it] 15%|█▌ | 9536/61904 [4:44:13<19:57:18, 1.37s/it] 15%|█▌ | 9537/61904 [4:44:14<19:39:37, 1.35s/it] 15%|█▌ | 9538/61904 [4:44:15<19:49:03, 1.36s/it] 15%|█▌ | 9539/61904 [4:44:17<19:32:32, 1.34s/it] 15%|█▌ | 9540/61904 [4:44:18<20:02:44, 1.38s/it] {'loss': 2.8439, 'learning_rate': 1.8486321794373134e-07, 'epoch': 2.47} + 15%|█▌ | 9540/61904 [4:44:18<20:02:44, 1.38s/it] 15%|█▌ | 9541/61904 [4:44:19<19:43:21, 1.36s/it] 15%|█▌ | 9542/61904 [4:44:21<19:24:33, 1.33s/it] 15%|█▌ | 9543/61904 [4:44:22<19:42:43, 1.36s/it] 15%|█▌ | 9544/61904 [4:44:23<19:40:33, 1.35s/it] 15%|█▌ | 9545/61904 [4:44:25<20:14:24, 1.39s/it] 15%|█▌ | 9546/61904 [4:44:26<19:57:45, 1.37s/it] 15%|█▌ | 9547/61904 [4:44:28<19:50:46, 1.36s/it] 15%|█▌ | 9548/61904 [4:44:29<19:46:47, 1.36s/it] 15%|█▌ | 9549/61904 [4:44:30<20:08:57, 1.39s/it] 15%|█▌ | 9550/61904 [4:44:32<21:12:02, 1.46s/it] 15%|█▌ | 9551/61904 [4:44:33<20:56:23, 1.44s/it] 15%|█▌ | 9552/61904 [4:44:35<20:38:30, 1.42s/it] 15%|█▌ | 9553/61904 [4:44:36<20:31:50, 1.41s/it] 15%|█▌ | 9554/61904 [4:44:37<20:28:05, 1.41s/it] 15%|█▌ | 9555/61904 [4:44:39<20:17:37, 1.40s/it] 15%|█▌ | 9556/61904 [4:44:40<20:47:25, 1.43s/it] 15%|█▌ | 9557/61904 [4:44:42<20:27:28, 1.41s/it] 15%|█▌ | 9558/61904 [4:44:43<19:36:54, 1.35s/it] 15%|█▌ | 9559/61904 [4:44:44<20:10:12, 1.39s/it] 15%|█▌ | 9560/61904 [4:44:46<19:49:00, 1.36s/it] {'loss': 2.9077, 'learning_rate': 1.8483080513418903e-07, 'epoch': 2.47} + 15%|█▌ | 9560/61904 [4:44:46<19:49:00, 1.36s/it] 15%|█▌ | 9561/61904 [4:44:47<20:09:07, 1.39s/it] 15%|█▌ | 9562/61904 [4:44:48<19:51:46, 1.37s/it] 15%|█▌ | 9563/61904 [4:44:50<20:13:24, 1.39s/it] 15%|█▌ | 9564/61904 [4:44:51<19:44:04, 1.36s/it] 15%|█▌ | 9565/61904 [4:44:52<19:17:56, 1.33s/it] 15%|█▌ | 9566/61904 [4:44:54<19:35:13, 1.35s/it] 15%|█▌ | 9567/61904 [4:44:55<19:25:00, 1.34s/it] 15%|█▌ | 9568/61904 [4:44:57<20:07:29, 1.38s/it] 15%|█▌ | 9569/61904 [4:44:58<19:45:57, 1.36s/it] 15%|█▌ | 9570/61904 [4:44:59<19:55:35, 1.37s/it] 15%|█▌ | 9571/61904 [4:45:01<19:55:59, 1.37s/it] 15%|█▌ | 9572/61904 [4:45:02<19:38:09, 1.35s/it] 15%|█▌ | 9573/61904 [4:45:03<19:21:03, 1.33s/it] 15%|█▌ | 9574/61904 [4:45:05<19:35:48, 1.35s/it] 15%|█▌ | 9575/61904 [4:45:06<19:55:31, 1.37s/it] 15%|█▌ | 9576/61904 [4:45:07<19:50:26, 1.36s/it] 15%|█▌ | 9577/61904 [4:45:09<21:04:43, 1.45s/it] 15%|█▌ | 9578/61904 [4:45:11<20:57:35, 1.44s/it] 15%|█▌ | 9579/61904 [4:45:12<21:03:00, 1.45s/it] 15%|█▌ | 9580/61904 [4:45:13<20:54:27, 1.44s/it] {'loss': 2.8844, 'learning_rate': 1.847983923246467e-07, 'epoch': 2.48} + 15%|█▌ | 9580/61904 [4:45:13<20:54:27, 1.44s/it] 15%|█▌ | 9581/61904 [4:45:15<20:28:33, 1.41s/it] 15%|█▌ | 9582/61904 [4:45:16<20:18:39, 1.40s/it] 15%|█▌ | 9583/61904 [4:45:17<19:37:10, 1.35s/it] 15%|█▌ | 9584/61904 [4:45:19<19:55:34, 1.37s/it] 15%|█▌ | 9585/61904 [4:45:20<19:31:50, 1.34s/it] 15%|█▌ | 9586/61904 [4:45:21<19:41:48, 1.36s/it] 15%|█▌ | 9587/61904 [4:45:23<19:33:59, 1.35s/it] 15%|█▌ | 9588/61904 [4:45:24<20:29:12, 1.41s/it] 15%|█▌ | 9589/61904 [4:45:26<20:16:08, 1.39s/it] 15%|█▌ | 9590/61904 [4:45:27<19:56:15, 1.37s/it] 15%|█▌ | 9591/61904 [4:45:28<19:53:32, 1.37s/it] 15%|█▌ | 9592/61904 [4:45:30<20:45:02, 1.43s/it] 15%|█▌ | 9593/61904 [4:45:31<20:26:49, 1.41s/it] 15%|█▌ | 9594/61904 [4:45:33<20:27:00, 1.41s/it] 15%|█▌ | 9595/61904 [4:45:34<19:55:36, 1.37s/it] 16%|█▌ | 9596/61904 [4:45:35<19:45:04, 1.36s/it] 16%|█▌ | 9597/61904 [4:45:37<19:58:15, 1.37s/it] 16%|█▌ | 9598/61904 [4:45:38<19:36:59, 1.35s/it] 16%|█▌ | 9599/61904 [4:45:39<19:32:58, 1.35s/it] 16%|█▌ | 9600/61904 [4:45:41<19:14:35, 1.32s/it] {'loss': 2.8135, 'learning_rate': 1.8476597951510435e-07, 'epoch': 2.48} + 16%|█▌ | 9600/61904 [4:45:41<19:14:35, 1.32s/it] 16%|█▌ | 9601/61904 [4:45:42<19:25:34, 1.34s/it] 16%|█▌ | 9602/61904 [4:45:43<19:37:12, 1.35s/it] 16%|█▌ | 9603/61904 [4:45:45<19:43:52, 1.36s/it] 16%|█▌ | 9604/61904 [4:45:46<19:13:37, 1.32s/it] 16%|█▌ | 9605/61904 [4:45:47<19:19:25, 1.33s/it] 16%|█▌ | 9606/61904 [4:45:49<19:50:01, 1.37s/it] 16%|█▌ | 9607/61904 [4:45:50<19:47:24, 1.36s/it] 16%|█▌ | 9608/61904 [4:45:51<19:30:08, 1.34s/it] 16%|█▌ | 9609/61904 [4:45:53<19:47:23, 1.36s/it] 16%|█▌ | 9610/61904 [4:45:54<20:31:51, 1.41s/it] 16%|█▌ | 9611/61904 [4:45:56<19:59:27, 1.38s/it] 16%|█▌ | 9612/61904 [4:45:57<19:33:25, 1.35s/it] 16%|█▌ | 9613/61904 [4:45:58<19:18:29, 1.33s/it] 16%|█▌ | 9614/61904 [4:46:00<20:01:13, 1.38s/it] 16%|█▌ | 9615/61904 [4:46:01<20:08:27, 1.39s/it] 16%|█▌ | 9616/61904 [4:46:03<20:47:11, 1.43s/it] 16%|█▌ | 9617/61904 [4:46:04<20:49:10, 1.43s/it] 16%|█▌ | 9618/61904 [4:46:05<20:08:44, 1.39s/it] 16%|█▌ | 9619/61904 [4:46:07<20:21:15, 1.40s/it] 16%|█▌ | 9620/61904 [4:46:08<20:01:26, 1.38s/it] {'loss': 2.8818, 'learning_rate': 1.8473356670556204e-07, 'epoch': 2.49} + 16%|█▌ | 9620/61904 [4:46:08<20:01:26, 1.38s/it] 16%|█▌ | 9621/61904 [4:46:10<20:03:21, 1.38s/it] 16%|█▌ | 9622/61904 [4:46:11<20:57:36, 1.44s/it] 16%|█▌ | 9623/61904 [4:46:13<20:55:35, 1.44s/it] 16%|█▌ | 9624/61904 [4:46:14<20:20:00, 1.40s/it] 16%|█▌ | 9625/61904 [4:46:15<20:17:07, 1.40s/it] 16%|█▌ | 9626/61904 [4:46:17<20:07:32, 1.39s/it] 16%|█▌ | 9627/61904 [4:46:18<19:28:09, 1.34s/it] 16%|█▌ | 9628/61904 [4:46:19<19:48:14, 1.36s/it] 16%|█▌ | 9629/61904 [4:46:21<19:53:48, 1.37s/it] 16%|█▌ | 9630/61904 [4:46:22<19:38:33, 1.35s/it] 16%|█▌ | 9631/61904 [4:46:24<20:26:24, 1.41s/it] 16%|█▌ | 9632/61904 [4:46:25<20:11:08, 1.39s/it] 16%|█▌ | 9633/61904 [4:46:26<20:04:05, 1.38s/it] 16%|█▌ | 9634/61904 [4:46:28<20:21:33, 1.40s/it] 16%|█▌ | 9635/61904 [4:46:29<20:41:02, 1.42s/it] 16%|█▌ | 9636/61904 [4:46:30<20:10:17, 1.39s/it] 16%|█▌ | 9637/61904 [4:46:32<19:31:12, 1.34s/it] 16%|█▌ | 9638/61904 [4:46:33<19:39:52, 1.35s/it] 16%|█▌ | 9639/61904 [4:46:35<20:10:14, 1.39s/it] 16%|█▌ | 9640/61904 [4:46:36<21:21:36, 1.47s/it] {'loss': 2.8716, 'learning_rate': 1.847011538960197e-07, 'epoch': 2.49} + 16%|█▌ | 9640/61904 [4:46:36<21:21:36, 1.47s/it] 16%|█▌ | 9641/61904 [4:46:37<20:03:27, 1.38s/it] 16%|█▌ | 9642/61904 [4:46:39<19:45:13, 1.36s/it] 16%|█▌ | 9643/61904 [4:46:40<19:25:05, 1.34s/it] 16%|█▌ | 9644/61904 [4:46:41<19:22:42, 1.33s/it] 16%|█▌ | 9645/61904 [4:46:43<19:28:28, 1.34s/it] 16%|█▌ | 9646/61904 [4:46:44<19:35:18, 1.35s/it] 16%|█▌ | 9647/61904 [4:46:45<19:34:27, 1.35s/it] 16%|█▌ | 9648/61904 [4:46:47<19:26:37, 1.34s/it] 16%|█▌ | 9649/61904 [4:46:48<19:38:46, 1.35s/it] 16%|█▌ | 9650/61904 [4:46:49<19:49:04, 1.37s/it] 16%|█▌ | 9651/61904 [4:46:51<19:56:51, 1.37s/it] 16%|█▌ | 9652/61904 [4:46:52<19:08:58, 1.32s/it] 16%|█▌ | 9653/61904 [4:46:54<19:40:49, 1.36s/it] 16%|█▌ | 9654/61904 [4:46:55<19:46:39, 1.36s/it] 16%|█▌ | 9655/61904 [4:46:56<19:40:59, 1.36s/it] 16%|█▌ | 9656/61904 [4:46:58<20:00:47, 1.38s/it] 16%|█▌ | 9657/61904 [4:46:59<19:56:19, 1.37s/it] 16%|█▌ | 9658/61904 [4:47:00<19:39:31, 1.35s/it] 16%|█▌ | 9659/61904 [4:47:02<19:32:47, 1.35s/it] 16%|█▌ | 9660/61904 [4:47:03<19:49:19, 1.37s/it] {'loss': 2.8514, 'learning_rate': 1.8466874108647736e-07, 'epoch': 2.5} + 16%|█▌ | 9660/61904 [4:47:03<19:49:19, 1.37s/it] 16%|█▌ | 9661/61904 [4:47:04<19:35:02, 1.35s/it] 16%|█▌ | 9662/61904 [4:47:06<19:17:32, 1.33s/it] 16%|█▌ | 9663/61904 [4:47:07<19:46:08, 1.36s/it] 16%|█▌ | 9664/61904 [4:47:08<19:21:27, 1.33s/it] 16%|█▌ | 9665/61904 [4:47:10<20:24:56, 1.41s/it] 16%|█▌ | 9666/61904 [4:47:11<20:48:57, 1.43s/it] 16%|█▌ | 9667/61904 [4:47:13<20:20:40, 1.40s/it] 16%|█▌ | 9668/61904 [4:47:14<20:08:16, 1.39s/it] 16%|█▌ | 9669/61904 [4:47:15<19:33:16, 1.35s/it] 16%|█▌ | 9670/61904 [4:47:17<20:00:22, 1.38s/it] 16%|█▌ | 9671/61904 [4:47:18<19:58:54, 1.38s/it] 16%|█▌ | 9672/61904 [4:47:20<20:01:14, 1.38s/it] 16%|█▌ | 9673/61904 [4:47:21<20:18:16, 1.40s/it] 16%|█▌ | 9674/61904 [4:47:22<20:13:22, 1.39s/it] 16%|█▌ | 9675/61904 [4:47:24<20:36:54, 1.42s/it] 16%|█▌ | 9676/61904 [4:47:25<20:18:17, 1.40s/it] 16%|█▌ | 9677/61904 [4:47:27<20:14:07, 1.39s/it] 16%|█▌ | 9678/61904 [4:47:28<19:56:59, 1.38s/it] 16%|█▌ | 9679/61904 [4:47:29<19:31:32, 1.35s/it] 16%|█▌ | 9680/61904 [4:47:31<19:54:00, 1.37s/it] {'loss': 2.8514, 'learning_rate': 1.8463632827693505e-07, 'epoch': 2.5} + 16%|█▌ | 9680/61904 [4:47:31<19:54:00, 1.37s/it] 16%|█▌ | 9681/61904 [4:47:32<20:04:46, 1.38s/it] 16%|█▌ | 9682/61904 [4:47:33<19:40:05, 1.36s/it] 16%|█▌ | 9683/61904 [4:47:35<19:41:34, 1.36s/it] 16%|█▌ | 9684/61904 [4:47:36<19:18:16, 1.33s/it] 16%|█▌ | 9685/61904 [4:47:37<19:46:17, 1.36s/it] 16%|█▌ | 9686/61904 [4:47:39<18:53:37, 1.30s/it] 16%|█▌ | 9687/61904 [4:47:40<18:50:04, 1.30s/it] 16%|█▌ | 9688/61904 [4:47:41<19:34:20, 1.35s/it] 16%|█▌ | 9689/61904 [4:47:43<19:41:48, 1.36s/it] 16%|█▌ | 9690/61904 [4:47:44<19:31:44, 1.35s/it] 16%|█▌ | 9691/61904 [4:47:45<19:19:35, 1.33s/it] 16%|█▌ | 9692/61904 [4:47:47<19:23:29, 1.34s/it] 16%|█▌ | 9693/61904 [4:47:48<18:51:15, 1.30s/it] 16%|█▌ | 9694/61904 [4:47:49<18:50:52, 1.30s/it] 16%|█▌ | 9695/61904 [4:47:51<18:56:52, 1.31s/it] 16%|█▌ | 9696/61904 [4:47:52<18:37:24, 1.28s/it] 16%|█▌ | 9697/61904 [4:47:53<18:32:17, 1.28s/it] 16%|█▌ | 9698/61904 [4:47:54<19:04:48, 1.32s/it] 16%|█▌ | 9699/61904 [4:47:56<19:24:53, 1.34s/it] 16%|█▌ | 9700/61904 [4:47:57<19:13:21, 1.33s/it] {'loss': 2.8991, 'learning_rate': 1.846039154673927e-07, 'epoch': 2.51} + 16%|█▌ | 9700/61904 [4:47:57<19:13:21, 1.33s/it] 16%|█▌ | 9701/61904 [4:47:58<19:16:05, 1.33s/it] 16%|█▌ | 9702/61904 [4:48:00<19:09:52, 1.32s/it] 16%|█▌ | 9703/61904 [4:48:01<19:06:52, 1.32s/it] 16%|█▌ | 9704/61904 [4:48:02<19:09:27, 1.32s/it] 16%|█▌ | 9705/61904 [4:48:04<19:29:00, 1.34s/it] 16%|█▌ | 9706/61904 [4:48:05<19:41:27, 1.36s/it] 16%|█▌ | 9707/61904 [4:48:07<20:20:22, 1.40s/it] 16%|█▌ | 9708/61904 [4:48:08<19:53:44, 1.37s/it] 16%|█▌ | 9709/61904 [4:48:09<19:45:23, 1.36s/it] 16%|█▌ | 9710/61904 [4:48:11<19:37:17, 1.35s/it] 16%|█▌ | 9711/61904 [4:48:12<19:14:18, 1.33s/it] 16%|█▌ | 9712/61904 [4:48:13<19:00:26, 1.31s/it] 16%|█▌ | 9713/61904 [4:48:14<18:42:14, 1.29s/it] 16%|█▌ | 9714/61904 [4:48:16<19:21:49, 1.34s/it] 16%|█▌ | 9715/61904 [4:48:17<18:55:42, 1.31s/it] 16%|█▌ | 9716/61904 [4:48:18<18:55:50, 1.31s/it] 16%|█▌ | 9717/61904 [4:48:20<19:32:37, 1.35s/it] 16%|█▌ | 9718/61904 [4:48:21<19:17:53, 1.33s/it] 16%|█▌ | 9719/61904 [4:48:23<20:38:03, 1.42s/it] 16%|█▌ | 9720/61904 [4:48:24<21:34:15, 1.49s/it] {'loss': 2.8639, 'learning_rate': 1.8457150265785038e-07, 'epoch': 2.51} + 16%|█▌ | 9720/61904 [4:48:24<21:34:15, 1.49s/it] 16%|█▌ | 9721/61904 [4:48:26<20:32:09, 1.42s/it] 16%|█▌ | 9722/61904 [4:48:27<19:58:03, 1.38s/it] 16%|█▌ | 9723/61904 [4:48:28<19:55:19, 1.37s/it] 16%|█▌ | 9724/61904 [4:48:30<19:38:42, 1.36s/it] 16%|█▌ | 9725/61904 [4:48:31<19:53:26, 1.37s/it] 16%|█▌ | 9726/61904 [4:48:33<20:17:48, 1.40s/it] 16%|█▌ | 9727/61904 [4:48:34<20:07:56, 1.39s/it] 16%|█▌ | 9728/61904 [4:48:35<20:02:52, 1.38s/it] 16%|█▌ | 9729/61904 [4:48:37<20:20:53, 1.40s/it] 16%|█▌ | 9730/61904 [4:48:38<19:47:01, 1.37s/it] 16%|█▌ | 9731/61904 [4:48:39<19:36:17, 1.35s/it] 16%|█▌ | 9732/61904 [4:48:41<19:41:05, 1.36s/it] 16%|█▌ | 9733/61904 [4:48:42<20:49:19, 1.44s/it] 16%|█▌ | 9734/61904 [4:48:44<20:41:16, 1.43s/it] 16%|█▌ | 9735/61904 [4:48:45<20:17:40, 1.40s/it] 16%|█▌ | 9736/61904 [4:48:46<20:06:03, 1.39s/it] 16%|█▌ | 9737/61904 [4:48:48<20:44:02, 1.43s/it] 16%|█▌ | 9738/61904 [4:48:49<19:58:41, 1.38s/it] 16%|█▌ | 9739/61904 [4:48:50<19:27:10, 1.34s/it] 16%|█▌ | 9740/61904 [4:48:52<20:06:49, 1.39s/it] {'loss': 2.839, 'learning_rate': 1.8453908984830804e-07, 'epoch': 2.52} + 16%|█▌ | 9740/61904 [4:48:52<20:06:49, 1.39s/it] 16%|█▌ | 9741/61904 [4:48:53<19:45:40, 1.36s/it] 16%|█▌ | 9742/61904 [4:48:55<19:33:33, 1.35s/it] 16%|█▌ | 9743/61904 [4:48:56<19:03:43, 1.32s/it] 16%|█▌ | 9744/61904 [4:48:57<18:53:02, 1.30s/it] 16%|█▌ | 9745/61904 [4:48:59<19:14:50, 1.33s/it] 16%|█▌ | 9746/61904 [4:49:00<19:32:37, 1.35s/it] 16%|█▌ | 9747/61904 [4:49:01<19:24:43, 1.34s/it] 16%|█▌ | 9748/61904 [4:49:03<19:39:30, 1.36s/it] 16%|█▌ | 9749/61904 [4:49:04<20:15:08, 1.40s/it] 16%|█▌ | 9750/61904 [4:49:05<19:54:39, 1.37s/it] 16%|█▌ | 9751/61904 [4:49:07<19:55:08, 1.37s/it] 16%|█▌ | 9752/61904 [4:49:08<20:04:16, 1.39s/it] 16%|█▌ | 9753/61904 [4:49:10<19:55:22, 1.38s/it] 16%|█▌ | 9754/61904 [4:49:11<19:42:25, 1.36s/it] 16%|█▌ | 9755/61904 [4:49:12<19:43:10, 1.36s/it] 16%|█▌ | 9756/61904 [4:49:14<19:31:23, 1.35s/it] 16%|█▌ | 9757/61904 [4:49:15<19:07:09, 1.32s/it] 16%|█▌ | 9758/61904 [4:49:16<19:38:08, 1.36s/it] 16%|█▌ | 9759/61904 [4:49:18<19:30:39, 1.35s/it] 16%|█▌ | 9760/61904 [4:49:19<18:59:00, 1.31s/it] {'loss': 2.8364, 'learning_rate': 1.845066770387657e-07, 'epoch': 2.52} + 16%|█▌ | 9760/61904 [4:49:19<18:59:00, 1.31s/it] 16%|█▌ | 9761/61904 [4:49:20<19:06:29, 1.32s/it] 16%|█▌ | 9762/61904 [4:49:21<18:51:28, 1.30s/it] 16%|█▌ | 9763/61904 [4:49:23<19:06:36, 1.32s/it] 16%|█▌ | 9764/61904 [4:49:24<18:53:26, 1.30s/it] 16%|█▌ | 9765/61904 [4:49:25<19:31:15, 1.35s/it] 16%|█▌ | 9766/61904 [4:49:27<19:27:24, 1.34s/it] 16%|█▌ | 9767/61904 [4:49:28<19:43:11, 1.36s/it] 16%|█▌ | 9768/61904 [4:49:30<19:55:45, 1.38s/it] 16%|█▌ | 9769/61904 [4:49:31<19:57:03, 1.38s/it] 16%|█▌ | 9770/61904 [4:49:32<19:44:55, 1.36s/it] 16%|█▌ | 9771/61904 [4:49:34<19:49:31, 1.37s/it] 16%|█▌ | 9772/61904 [4:49:35<19:47:46, 1.37s/it] 16%|█▌ | 9773/61904 [4:49:37<19:59:35, 1.38s/it] 16%|█▌ | 9774/61904 [4:49:38<19:47:47, 1.37s/it] 16%|█▌ | 9775/61904 [4:49:39<19:10:24, 1.32s/it] 16%|█▌ | 9776/61904 [4:49:40<18:51:16, 1.30s/it] 16%|█▌ | 9777/61904 [4:49:42<19:18:17, 1.33s/it] 16%|█▌ | 9778/61904 [4:49:43<19:58:29, 1.38s/it] 16%|█▌ | 9779/61904 [4:49:45<20:09:39, 1.39s/it] 16%|█▌ | 9780/61904 [4:49:46<19:58:12, 1.38s/it] {'loss': 2.8632, 'learning_rate': 1.844742642292234e-07, 'epoch': 2.53} + 16%|█▌ | 9780/61904 [4:49:46<19:58:12, 1.38s/it] 16%|█▌ | 9781/61904 [4:49:47<19:46:52, 1.37s/it] 16%|█▌ | 9782/61904 [4:49:49<19:40:12, 1.36s/it] 16%|█▌ | 9783/61904 [4:49:50<19:08:20, 1.32s/it] 16%|█▌ | 9784/61904 [4:49:51<19:59:01, 1.38s/it] 16%|█▌ | 9785/61904 [4:49:53<19:38:51, 1.36s/it] 16%|█▌ | 9786/61904 [4:49:54<19:34:21, 1.35s/it] 16%|█▌ | 9787/61904 [4:49:55<19:53:09, 1.37s/it] 16%|█▌ | 9788/61904 [4:49:57<19:40:13, 1.36s/it] 16%|█▌ | 9789/61904 [4:49:58<19:40:07, 1.36s/it] 16%|█▌ | 9790/61904 [4:49:59<19:15:59, 1.33s/it] 16%|█▌ | 9791/61904 [4:50:01<19:25:32, 1.34s/it] 16%|█▌ | 9792/61904 [4:50:02<19:05:25, 1.32s/it] 16%|█▌ | 9793/61904 [4:50:03<19:30:23, 1.35s/it] 16%|█▌ | 9794/61904 [4:50:05<20:11:03, 1.39s/it] 16%|█▌ | 9795/61904 [4:50:06<19:38:11, 1.36s/it] 16%|█▌ | 9796/61904 [4:50:08<20:10:56, 1.39s/it] 16%|█▌ | 9797/61904 [4:50:09<19:49:48, 1.37s/it] 16%|█▌ | 9798/61904 [4:50:10<20:05:20, 1.39s/it] 16%|█▌ | 9799/61904 [4:50:12<19:45:48, 1.37s/it] 16%|█▌ | 9800/61904 [4:50:13<19:56:49, 1.38s/it] {'loss': 2.8573, 'learning_rate': 1.8444185141968105e-07, 'epoch': 2.53} + 16%|█▌ | 9800/61904 [4:50:13<19:56:49, 1.38s/it] 16%|█▌ | 9801/61904 [4:50:15<20:08:50, 1.39s/it] 16%|█▌ | 9802/61904 [4:50:16<20:28:58, 1.42s/it] 16%|█▌ | 9803/61904 [4:50:17<20:05:38, 1.39s/it] 16%|█▌ | 9804/61904 [4:50:19<20:14:45, 1.40s/it] 16%|█▌ | 9805/61904 [4:50:20<19:46:22, 1.37s/it] 16%|█▌ | 9806/61904 [4:50:22<21:12:56, 1.47s/it] 16%|█▌ | 9807/61904 [4:50:23<21:14:04, 1.47s/it] 16%|█▌ | 9808/61904 [4:50:25<21:05:03, 1.46s/it] 16%|█▌ | 9809/61904 [4:50:26<20:29:38, 1.42s/it] 16%|█▌ | 9810/61904 [4:50:27<19:35:04, 1.35s/it] 16%|█▌ | 9811/61904 [4:50:29<19:42:57, 1.36s/it] 16%|█▌ | 9812/61904 [4:50:30<21:17:43, 1.47s/it] 16%|█▌ | 9813/61904 [4:50:32<20:23:42, 1.41s/it] 16%|█▌ | 9814/61904 [4:50:33<19:49:49, 1.37s/it] 16%|█▌ | 9815/61904 [4:50:34<19:40:35, 1.36s/it] 16%|█▌ | 9816/61904 [4:50:36<20:17:46, 1.40s/it] 16%|█▌ | 9817/61904 [4:50:37<20:05:06, 1.39s/it] 16%|█▌ | 9818/61904 [4:50:38<19:45:13, 1.37s/it] 16%|█▌ | 9819/61904 [4:50:40<20:29:50, 1.42s/it] 16%|█▌ | 9820/61904 [4:50:41<20:59:55, 1.45s/it] {'loss': 2.869, 'learning_rate': 1.844094386101387e-07, 'epoch': 2.54} + 16%|█▌ | 9820/61904 [4:50:41<20:59:55, 1.45s/it] 16%|█▌ | 9821/61904 [4:50:43<20:34:10, 1.42s/it] 16%|█▌ | 9822/61904 [4:50:44<20:37:52, 1.43s/it] 16%|█▌ | 9823/61904 [4:50:46<20:39:06, 1.43s/it] 16%|█▌ | 9824/61904 [4:50:47<20:13:14, 1.40s/it] 16%|█▌ | 9825/61904 [4:50:48<20:11:59, 1.40s/it] 16%|█▌ | 9826/61904 [4:50:50<19:53:08, 1.37s/it] 16%|█▌ | 9827/61904 [4:50:51<19:37:47, 1.36s/it] 16%|█▌ | 9828/61904 [4:50:52<19:56:00, 1.38s/it] 16%|█▌ | 9829/61904 [4:50:54<19:38:03, 1.36s/it] 16%|█▌ | 9830/61904 [4:50:55<19:45:47, 1.37s/it] 16%|█▌ | 9831/61904 [4:50:57<19:45:45, 1.37s/it] 16%|█▌ | 9832/61904 [4:50:58<19:55:41, 1.38s/it] 16%|█▌ | 9833/61904 [4:51:00<20:38:05, 1.43s/it] 16%|█▌ | 9834/61904 [4:51:01<20:10:04, 1.39s/it] 16%|█▌ | 9835/61904 [4:51:02<19:48:39, 1.37s/it] 16%|█▌ | 9836/61904 [4:51:04<20:41:21, 1.43s/it] 16%|█▌ | 9837/61904 [4:51:05<19:49:06, 1.37s/it] 16%|█▌ | 9838/61904 [4:51:06<19:59:18, 1.38s/it] 16%|█▌ | 9839/61904 [4:51:08<20:02:27, 1.39s/it] 16%|█▌ | 9840/61904 [4:51:09<19:21:01, 1.34s/it] {'loss': 2.8879, 'learning_rate': 1.843770258005964e-07, 'epoch': 2.54} + 16%|█▌ | 9840/61904 [4:51:09<19:21:01, 1.34s/it] 16%|█▌ | 9841/61904 [4:51:10<18:47:35, 1.30s/it] 16%|█▌ | 9842/61904 [4:51:12<18:56:50, 1.31s/it] 16%|█▌ | 9843/61904 [4:51:13<19:10:27, 1.33s/it] 16%|█▌ | 9844/61904 [4:51:14<18:53:16, 1.31s/it] 16%|█▌ | 9845/61904 [4:51:16<20:44:40, 1.43s/it] 16%|█▌ | 9846/61904 [4:51:17<20:11:27, 1.40s/it] 16%|█▌ | 9847/61904 [4:51:18<19:43:35, 1.36s/it] 16%|█▌ | 9848/61904 [4:51:20<20:00:01, 1.38s/it] 16%|█▌ | 9849/61904 [4:51:21<19:42:13, 1.36s/it] 16%|█▌ | 9850/61904 [4:51:23<19:43:22, 1.36s/it] 16%|█▌ | 9851/61904 [4:51:24<19:58:32, 1.38s/it] 16%|█▌ | 9852/61904 [4:51:25<19:33:52, 1.35s/it] 16%|█▌ | 9853/61904 [4:51:27<19:35:15, 1.35s/it] 16%|█▌ | 9854/61904 [4:51:28<19:30:38, 1.35s/it] 16%|█▌ | 9855/61904 [4:51:29<19:10:25, 1.33s/it] 16%|█▌ | 9856/61904 [4:51:31<19:08:04, 1.32s/it] 16%|█▌ | 9857/61904 [4:51:32<19:29:38, 1.35s/it] 16%|█▌ | 9858/61904 [4:51:33<19:17:12, 1.33s/it] 16%|█▌ | 9859/61904 [4:51:35<19:49:14, 1.37s/it] 16%|█▌ | 9860/61904 [4:51:36<19:37:46, 1.36s/it] {'loss': 2.8887, 'learning_rate': 1.8434461299105406e-07, 'epoch': 2.55} + 16%|█▌ | 9860/61904 [4:51:36<19:37:46, 1.36s/it] 16%|█▌ | 9861/61904 [4:51:37<19:26:03, 1.34s/it] 16%|█▌ | 9862/61904 [4:51:39<19:00:47, 1.32s/it] 16%|█▌ | 9863/61904 [4:51:40<18:36:32, 1.29s/it] 16%|█▌ | 9864/61904 [4:51:41<19:00:22, 1.31s/it] 16%|█▌ | 9865/61904 [4:51:43<19:22:32, 1.34s/it] 16%|█▌ | 9866/61904 [4:51:44<18:34:00, 1.28s/it] 16%|█▌ | 9867/61904 [4:51:45<19:24:37, 1.34s/it] 16%|█▌ | 9868/61904 [4:51:47<19:42:54, 1.36s/it] 16%|█▌ | 9869/61904 [4:51:48<19:48:28, 1.37s/it] 16%|█▌ | 9870/61904 [4:51:50<20:27:10, 1.42s/it] 16%|█▌ | 9871/61904 [4:51:51<19:51:39, 1.37s/it] 16%|█▌ | 9872/61904 [4:51:52<19:28:02, 1.35s/it] 16%|█▌ | 9873/61904 [4:51:54<20:10:05, 1.40s/it] 16%|█▌ | 9874/61904 [4:51:55<19:53:24, 1.38s/it] 16%|█▌ | 9875/61904 [4:51:56<20:07:37, 1.39s/it] 16%|█▌ | 9876/61904 [4:51:58<19:53:32, 1.38s/it] 16%|█▌ | 9877/61904 [4:51:59<20:00:00, 1.38s/it] 16%|█▌ | 9878/61904 [4:52:01<20:15:37, 1.40s/it] 16%|█▌ | 9879/61904 [4:52:02<19:26:45, 1.35s/it] 16%|█▌ | 9880/61904 [4:52:03<18:59:49, 1.31s/it] {'loss': 2.817, 'learning_rate': 1.8431220018151172e-07, 'epoch': 2.55} + 16%|█▌ | 9880/61904 [4:52:03<18:59:49, 1.31s/it] 16%|█▌ | 9881/61904 [4:52:04<18:49:35, 1.30s/it] 16%|█▌ | 9882/61904 [4:52:06<18:41:28, 1.29s/it] 16%|█▌ | 9883/61904 [4:52:07<19:01:05, 1.32s/it] 16%|█▌ | 9884/61904 [4:52:08<19:37:32, 1.36s/it] 16%|█▌ | 9885/61904 [4:52:10<19:49:43, 1.37s/it] 16%|█▌ | 9886/61904 [4:52:11<19:03:43, 1.32s/it] 16%|█▌ | 9887/61904 [4:52:12<19:26:05, 1.35s/it] 16%|█▌ | 9888/61904 [4:52:14<19:51:12, 1.37s/it] 16%|█▌ | 9889/61904 [4:52:15<19:23:16, 1.34s/it] 16%|█▌ | 9890/61904 [4:52:16<19:21:28, 1.34s/it] 16%|█▌ | 9891/61904 [4:52:18<19:33:33, 1.35s/it] 16%|█▌ | 9892/61904 [4:52:19<20:10:50, 1.40s/it] 16%|█▌ | 9893/61904 [4:52:21<20:15:27, 1.40s/it] 16%|█▌ | 9894/61904 [4:52:22<20:01:30, 1.39s/it] 16%|█▌ | 9895/61904 [4:52:23<19:30:48, 1.35s/it] 16%|█▌ | 9896/61904 [4:52:25<19:19:34, 1.34s/it] 16%|█▌ | 9897/61904 [4:52:26<19:27:06, 1.35s/it] 16%|█▌ | 9898/61904 [4:52:27<19:48:15, 1.37s/it] 16%|█▌ | 9899/61904 [4:52:29<19:42:41, 1.36s/it] 16%|█▌ | 9900/61904 [4:52:30<19:33:03, 1.35s/it] {'loss': 2.8758, 'learning_rate': 1.842797873719694e-07, 'epoch': 2.56} + 16%|█▌ | 9900/61904 [4:52:30<19:33:03, 1.35s/it] 16%|█▌ | 9901/61904 [4:52:31<18:56:51, 1.31s/it] 16%|█▌ | 9902/61904 [4:52:33<19:25:04, 1.34s/it] 16%|█▌ | 9903/61904 [4:52:34<19:09:45, 1.33s/it] 16%|█▌ | 9904/61904 [4:52:35<19:15:00, 1.33s/it] 16%|█▌ | 9905/61904 [4:52:37<19:25:33, 1.34s/it] 16%|█▌ | 9906/61904 [4:52:38<19:08:02, 1.32s/it] 16%|█▌ | 9907/61904 [4:52:39<18:58:02, 1.31s/it] 16%|█▌ | 9908/61904 [4:52:41<19:05:59, 1.32s/it] 16%|█▌ | 9909/61904 [4:52:42<19:20:52, 1.34s/it] 16%|█▌ | 9910/61904 [4:52:44<19:59:35, 1.38s/it] 16%|█▌ | 9911/61904 [4:52:45<19:52:00, 1.38s/it] 16%|█▌ | 9912/61904 [4:52:46<19:41:59, 1.36s/it] 16%|█▌ | 9913/61904 [4:52:47<19:07:01, 1.32s/it] 16%|█▌ | 9914/61904 [4:52:49<19:02:47, 1.32s/it] 16%|█▌ | 9915/61904 [4:52:50<19:04:44, 1.32s/it] 16%|█▌ | 9916/61904 [4:52:51<19:03:53, 1.32s/it] 16%|█▌ | 9917/61904 [4:52:53<19:40:59, 1.36s/it] 16%|█▌ | 9918/61904 [4:52:54<19:59:15, 1.38s/it] 16%|█▌ | 9919/61904 [4:52:56<19:35:55, 1.36s/it] 16%|█▌ | 9920/61904 [4:52:57<19:27:56, 1.35s/it] {'loss': 2.8466, 'learning_rate': 1.8424737456242705e-07, 'epoch': 2.56} + 16%|█▌ | 9920/61904 [4:52:57<19:27:56, 1.35s/it] 16%|█▌ | 9921/61904 [4:52:58<18:33:52, 1.29s/it] 16%|█▌ | 9922/61904 [4:52:59<18:58:32, 1.31s/it] 16%|█▌ | 9923/61904 [4:53:01<19:15:57, 1.33s/it] 16%|█▌ | 9924/61904 [4:53:02<18:57:49, 1.31s/it] 16%|█▌ | 9925/61904 [4:53:03<19:00:38, 1.32s/it] 16%|█▌ | 9926/61904 [4:53:05<18:55:45, 1.31s/it] 16%|█▌ | 9927/61904 [4:53:06<19:01:02, 1.32s/it] 16%|█▌ | 9928/61904 [4:53:07<18:54:42, 1.31s/it] 16%|█▌ | 9929/61904 [4:53:09<18:57:58, 1.31s/it] 16%|█▌ | 9930/61904 [4:53:10<19:14:06, 1.33s/it] 16%|█▌ | 9931/61904 [4:53:11<19:05:05, 1.32s/it] 16%|█▌ | 9932/61904 [4:53:13<19:19:46, 1.34s/it] 16%|█▌ | 9933/61904 [4:53:14<19:23:37, 1.34s/it] 16%|█▌ | 9934/61904 [4:53:16<20:25:17, 1.41s/it] 16%|█▌ | 9935/61904 [4:53:17<20:21:55, 1.41s/it] 16%|█▌ | 9936/61904 [4:53:18<19:52:54, 1.38s/it] 16%|█▌ | 9937/61904 [4:53:20<20:08:10, 1.39s/it] 16%|█▌ | 9938/61904 [4:53:21<19:43:55, 1.37s/it] 16%|█▌ | 9939/61904 [4:53:23<20:23:07, 1.41s/it] 16%|█▌ | 9940/61904 [4:53:24<20:02:32, 1.39s/it] {'loss': 2.8203, 'learning_rate': 1.8421496175288474e-07, 'epoch': 2.57} + 16%|█▌ | 9940/61904 [4:53:24<20:02:32, 1.39s/it] 16%|█▌ | 9941/61904 [4:53:25<20:07:07, 1.39s/it] 16%|█▌ | 9942/61904 [4:53:27<20:08:18, 1.40s/it] 16%|█▌ | 9943/61904 [4:53:28<19:42:06, 1.36s/it] 16%|█▌ | 9944/61904 [4:53:29<19:14:38, 1.33s/it] 16%|█▌ | 9945/61904 [4:53:31<19:42:37, 1.37s/it] 16%|█▌ | 9946/61904 [4:53:32<19:59:39, 1.39s/it] 16%|█▌ | 9947/61904 [4:53:34<20:00:31, 1.39s/it] 16%|█▌ | 9948/61904 [4:53:35<19:49:09, 1.37s/it] 16%|█▌ | 9949/61904 [4:53:36<19:22:05, 1.34s/it] 16%|█▌ | 9950/61904 [4:53:38<19:41:51, 1.36s/it] 16%|█▌ | 9951/61904 [4:53:39<19:17:35, 1.34s/it] 16%|█▌ | 9952/61904 [4:53:40<19:25:31, 1.35s/it] 16%|█▌ | 9953/61904 [4:53:42<19:17:55, 1.34s/it] 16%|█▌ | 9954/61904 [4:53:43<19:14:38, 1.33s/it] 16%|█▌ | 9955/61904 [4:53:44<19:18:04, 1.34s/it] 16%|█▌ | 9956/61904 [4:53:46<19:10:43, 1.33s/it] 16%|█▌ | 9957/61904 [4:53:47<19:40:06, 1.36s/it] 16%|█▌ | 9958/61904 [4:53:48<19:19:27, 1.34s/it] 16%|█▌ | 9959/61904 [4:53:50<19:06:31, 1.32s/it] 16%|█▌ | 9960/61904 [4:53:51<19:37:03, 1.36s/it] {'loss': 2.8138, 'learning_rate': 1.841825489433424e-07, 'epoch': 2.57} + 16%|█▌ | 9960/61904 [4:53:51<19:37:03, 1.36s/it] 16%|█▌ | 9961/61904 [4:53:52<19:33:36, 1.36s/it] 16%|█▌ | 9962/61904 [4:53:54<19:37:30, 1.36s/it] 16%|█▌ | 9963/61904 [4:53:55<19:42:13, 1.37s/it] 16%|█▌ | 9964/61904 [4:53:56<19:28:44, 1.35s/it] 16%|█▌ | 9965/61904 [4:53:58<20:21:50, 1.41s/it] 16%|█▌ | 9966/61904 [4:53:59<20:32:52, 1.42s/it] 16%|█▌ | 9967/61904 [4:54:01<20:25:04, 1.42s/it] 16%|█▌ | 9968/61904 [4:54:02<19:50:21, 1.38s/it] 16%|█▌ | 9969/61904 [4:54:04<19:54:28, 1.38s/it] 16%|█▌ | 9970/61904 [4:54:05<19:19:04, 1.34s/it] 16%|█▌ | 9971/61904 [4:54:06<18:59:58, 1.32s/it] 16%|█▌ | 9972/61904 [4:54:07<19:01:03, 1.32s/it] 16%|█▌ | 9973/61904 [4:54:09<20:07:40, 1.40s/it] 16%|█▌ | 9974/61904 [4:54:10<19:46:07, 1.37s/it] 16%|█▌ | 9975/61904 [4:54:12<19:39:39, 1.36s/it] 16%|█▌ | 9976/61904 [4:54:13<19:28:59, 1.35s/it] 16%|█▌ | 9977/61904 [4:54:14<19:21:55, 1.34s/it] 16%|█▌ | 9978/61904 [4:54:16<19:42:38, 1.37s/it] 16%|█▌ | 9979/61904 [4:54:17<20:19:09, 1.41s/it] 16%|█▌ | 9980/61904 [4:54:19<21:13:03, 1.47s/it] {'loss': 2.8538, 'learning_rate': 1.8415013613380006e-07, 'epoch': 2.58} + 16%|█▌ | 9980/61904 [4:54:19<21:13:03, 1.47s/it] 16%|█▌ | 9981/61904 [4:54:20<20:43:30, 1.44s/it] 16%|█▌ | 9982/61904 [4:54:22<21:17:53, 1.48s/it] 16%|█▌ | 9983/61904 [4:54:23<20:03:58, 1.39s/it] 16%|█▌ | 9984/61904 [4:54:24<20:03:51, 1.39s/it] 16%|█▌ | 9985/61904 [4:54:26<20:17:23, 1.41s/it] 16%|█▌ | 9986/61904 [4:54:27<20:19:53, 1.41s/it] 16%|█▌ | 9987/61904 [4:54:29<20:13:21, 1.40s/it] 16%|█▌ | 9988/61904 [4:54:30<19:58:20, 1.38s/it] 16%|█▌ | 9989/61904 [4:54:31<20:14:38, 1.40s/it] 16%|█▌ | 9990/61904 [4:54:33<19:39:53, 1.36s/it] 16%|█▌ | 9991/61904 [4:54:34<19:46:06, 1.37s/it] 16%|█▌ | 9992/61904 [4:54:35<20:05:11, 1.39s/it] 16%|█▌ | 9993/61904 [4:54:37<20:31:17, 1.42s/it] 16%|█▌ | 9994/61904 [4:54:38<20:09:48, 1.40s/it] 16%|█▌ | 9995/61904 [4:54:40<20:33:01, 1.43s/it] 16%|█▌ | 9996/61904 [4:54:41<20:19:21, 1.41s/it] 16%|█▌ | 9997/61904 [4:54:43<20:56:30, 1.45s/it] 16%|█▌ | 9998/61904 [4:54:44<20:18:52, 1.41s/it] 16%|█▌ | 9999/61904 [4:54:45<20:13:51, 1.40s/it] 16%|█▌ | 10000/61904 [4:54:47<20:09:23, 1.40s/it] {'loss': 2.779, 'learning_rate': 1.8411772332425775e-07, 'epoch': 2.58} + 16%|█▌ | 10000/61904 [4:54:47<20:09:23, 1.40s/it] 16%|█▌ | 10001/61904 [4:54:48<19:55:26, 1.38s/it] 16%|█▌ | 10002/61904 [4:54:49<19:49:16, 1.37s/it] 16%|█▌ | 10003/61904 [4:54:51<18:56:52, 1.31s/it] 16%|█▌ | 10004/61904 [4:54:52<18:52:15, 1.31s/it] 16%|█▌ | 10005/61904 [4:54:53<19:32:44, 1.36s/it] 16%|█▌ | 10006/61904 [4:54:55<19:33:36, 1.36s/it] 16%|█▌ | 10007/61904 [4:54:56<20:04:12, 1.39s/it] 16%|█▌ | 10008/61904 [4:54:58<20:09:21, 1.40s/it] 16%|█▌ | 10009/61904 [4:54:59<20:46:46, 1.44s/it] 16%|█▌ | 10010/61904 [4:55:01<20:21:48, 1.41s/it] 16%|█▌ | 10011/61904 [4:55:02<20:24:27, 1.42s/it] 16%|█▌ | 10012/61904 [4:55:03<20:42:09, 1.44s/it] 16%|█▌ | 10013/61904 [4:55:05<20:22:17, 1.41s/it] 16%|█▌ | 10014/61904 [4:55:06<20:09:15, 1.40s/it] 16%|█▌ | 10015/61904 [4:55:08<20:42:30, 1.44s/it] 16%|█▌ | 10016/61904 [4:55:09<20:56:13, 1.45s/it] 16%|█▌ | 10017/61904 [4:55:10<19:57:51, 1.39s/it] 16%|█▌ | 10018/61904 [4:55:12<19:47:47, 1.37s/it] 16%|█▌ | 10019/61904 [4:55:13<20:12:25, 1.40s/it] 16%|█▌ | 10020/61904 [4:55:15<20:02:26, 1.39s/it] {'loss': 2.8533, 'learning_rate': 1.840853105147154e-07, 'epoch': 2.59} + 16%|█▌ | 10020/61904 [4:55:15<20:02:26, 1.39s/it] 16%|█▌ | 10021/61904 [4:55:16<20:20:33, 1.41s/it] 16%|█▌ | 10022/61904 [4:55:17<20:14:20, 1.40s/it] 16%|█▌ | 10023/61904 [4:55:19<19:58:14, 1.39s/it] 16%|█▌ | 10024/61904 [4:55:20<20:10:59, 1.40s/it] 16%|█▌ | 10025/61904 [4:55:22<19:58:18, 1.39s/it] 16%|█▌ | 10026/61904 [4:55:23<19:34:06, 1.36s/it] 16%|█▌ | 10027/61904 [4:55:24<19:09:40, 1.33s/it] 16%|█▌ | 10028/61904 [4:55:26<19:49:09, 1.38s/it] 16%|█▌ | 10029/61904 [4:55:27<19:42:52, 1.37s/it] 16%|█▌ | 10030/61904 [4:55:28<19:44:43, 1.37s/it] 16%|█▌ | 10031/61904 [4:55:30<19:40:10, 1.37s/it] 16%|█▌ | 10032/61904 [4:55:31<19:29:12, 1.35s/it] 16%|█▌ | 10033/61904 [4:55:33<20:11:55, 1.40s/it] 16%|█▌ | 10034/61904 [4:55:34<19:59:57, 1.39s/it] 16%|█▌ | 10035/61904 [4:55:35<19:28:31, 1.35s/it] 16%|█▌ | 10036/61904 [4:55:37<19:34:12, 1.36s/it] 16%|█▌ | 10037/61904 [4:55:38<19:31:52, 1.36s/it] 16%|█▌ | 10038/61904 [4:55:39<19:15:19, 1.34s/it] 16%|█▌ | 10039/61904 [4:55:41<19:40:58, 1.37s/it] 16%|█▌ | 10040/61904 [4:55:42<20:08:15, 1.40s/it] {'loss': 2.8389, 'learning_rate': 1.8405289770517307e-07, 'epoch': 2.59} + 16%|█▌ | 10040/61904 [4:55:42<20:08:15, 1.40s/it] 16%|█▌ | 10041/61904 [4:55:44<20:46:20, 1.44s/it] 16%|█▌ | 10042/61904 [4:55:45<20:32:40, 1.43s/it] 16%|█▌ | 10043/61904 [4:55:46<20:34:37, 1.43s/it] 16%|█▌ | 10044/61904 [4:55:48<20:01:34, 1.39s/it] 16%|█▌ | 10045/61904 [4:55:49<20:13:34, 1.40s/it] 16%|█▌ | 10046/61904 [4:55:50<19:41:48, 1.37s/it] 16%|█▌ | 10047/61904 [4:55:52<18:56:21, 1.31s/it] 16%|█▌ | 10048/61904 [4:55:53<19:21:22, 1.34s/it] 16%|█▌ | 10049/61904 [4:55:54<19:22:48, 1.35s/it] 16%|█▌ | 10050/61904 [4:55:56<19:19:07, 1.34s/it] 16%|█▌ | 10051/61904 [4:55:57<19:55:09, 1.38s/it] 16%|█▌ | 10052/61904 [4:55:59<19:46:01, 1.37s/it] 16%|█▌ | 10053/61904 [4:56:00<19:54:07, 1.38s/it] 16%|█▌ | 10054/61904 [4:56:01<19:58:52, 1.39s/it] 16%|█▌ | 10055/61904 [4:56:03<20:20:43, 1.41s/it] 16%|█▌ | 10056/61904 [4:56:04<19:52:15, 1.38s/it] 16%|█▌ | 10057/61904 [4:56:05<19:31:35, 1.36s/it] 16%|█▌ | 10058/61904 [4:56:07<19:41:31, 1.37s/it] 16%|█▌ | 10059/61904 [4:56:08<19:48:36, 1.38s/it] 16%|█▋ | 10060/61904 [4:56:09<19:20:57, 1.34s/it] {'loss': 2.9132, 'learning_rate': 1.8402048489563076e-07, 'epoch': 2.6} + 16%|█▋ | 10060/61904 [4:56:09<19:20:57, 1.34s/it] 16%|█▋ | 10061/61904 [4:56:11<19:21:58, 1.34s/it] 16%|█▋ | 10062/61904 [4:56:12<19:23:43, 1.35s/it] 16%|█▋ | 10063/61904 [4:56:14<19:51:56, 1.38s/it] 16%|█▋ | 10064/61904 [4:56:15<19:37:41, 1.36s/it] 16%|█▋ | 10065/61904 [4:56:16<19:23:28, 1.35s/it] 16%|█▋ | 10066/61904 [4:56:18<20:08:30, 1.40s/it] 16%|█▋ | 10067/61904 [4:56:19<20:35:07, 1.43s/it] 16%|█▋ | 10068/61904 [4:56:21<20:25:57, 1.42s/it] 16%|█▋ | 10069/61904 [4:56:22<20:04:09, 1.39s/it] 16%|█▋ | 10070/61904 [4:56:23<19:56:02, 1.38s/it] 16%|█▋ | 10071/61904 [4:56:25<20:17:31, 1.41s/it] 16%|█▋ | 10072/61904 [4:56:26<19:25:52, 1.35s/it] 16%|█▋ | 10073/61904 [4:56:27<19:15:36, 1.34s/it] 16%|█▋ | 10074/61904 [4:56:29<19:15:18, 1.34s/it] 16%|█▋ | 10075/61904 [4:56:30<18:53:49, 1.31s/it] 16%|█▋ | 10076/61904 [4:56:31<19:08:22, 1.33s/it] 16%|█▋ | 10077/61904 [4:56:33<19:17:35, 1.34s/it] 16%|█▋ | 10078/61904 [4:56:34<19:00:44, 1.32s/it] 16%|█▋ | 10079/61904 [4:56:35<19:12:14, 1.33s/it] 16%|█▋ | 10080/61904 [4:56:37<21:18:27, 1.48s/it] {'loss': 2.8213, 'learning_rate': 1.8398807208608842e-07, 'epoch': 2.6} + 16%|█▋ | 10080/61904 [4:56:37<21:18:27, 1.48s/it] 16%|█▋ | 10081/61904 [4:56:38<20:21:42, 1.41s/it] 16%|█▋ | 10082/61904 [4:56:40<20:06:05, 1.40s/it] 16%|█▋ | 10083/61904 [4:56:41<20:10:57, 1.40s/it] 16%|█▋ | 10084/61904 [4:56:43<20:17:30, 1.41s/it] 16%|█▋ | 10085/61904 [4:56:44<19:54:43, 1.38s/it] 16%|█▋ | 10086/61904 [4:56:45<19:41:53, 1.37s/it] 16%|█▋ | 10087/61904 [4:56:47<19:49:55, 1.38s/it] 16%|█▋ | 10088/61904 [4:56:48<19:55:17, 1.38s/it] 16%|█▋ | 10089/61904 [4:56:50<20:18:46, 1.41s/it] 16%|█▋ | 10090/61904 [4:56:51<20:06:24, 1.40s/it] 16%|█▋ | 10091/61904 [4:56:52<20:49:30, 1.45s/it] 16%|█▋ | 10092/61904 [4:56:54<20:37:16, 1.43s/it] 16%|█▋ | 10093/61904 [4:56:55<20:10:35, 1.40s/it] 16%|█▋ | 10094/61904 [4:56:57<20:35:30, 1.43s/it] 16%|█▋ | 10095/61904 [4:56:58<20:13:56, 1.41s/it] 16%|█▋ | 10096/61904 [4:56:59<20:19:21, 1.41s/it] 16%|█▋ | 10097/61904 [4:57:01<20:09:33, 1.40s/it] 16%|█▋ | 10098/61904 [4:57:02<20:25:49, 1.42s/it] 16%|█▋ | 10099/61904 [4:57:04<20:40:53, 1.44s/it] 16%|█▋ | 10100/61904 [4:57:05<20:20:32, 1.41s/it] {'loss': 2.7963, 'learning_rate': 1.8395565927654608e-07, 'epoch': 2.61} + 16%|█▋ | 10100/61904 [4:57:05<20:20:32, 1.41s/it] 16%|█▋ | 10101/61904 [4:57:07<20:56:48, 1.46s/it] 16%|█▋ | 10102/61904 [4:57:08<21:13:03, 1.47s/it] 16%|█▋ | 10103/61904 [4:57:10<20:56:19, 1.46s/it] 16%|█▋ | 10104/61904 [4:57:11<21:30:29, 1.49s/it] 16%|█▋ | 10105/61904 [4:57:13<21:42:04, 1.51s/it] 16%|█▋ | 10106/61904 [4:57:14<21:26:21, 1.49s/it] 16%|█▋ | 10107/61904 [4:57:15<20:34:27, 1.43s/it] 16%|█▋ | 10108/61904 [4:57:17<20:13:27, 1.41s/it] 16%|█▋ | 10109/61904 [4:57:18<20:29:43, 1.42s/it] 16%|█▋ | 10110/61904 [4:57:20<20:04:36, 1.40s/it] 16%|█▋ | 10111/61904 [4:57:21<19:44:20, 1.37s/it] 16%|█▋ | 10112/61904 [4:57:22<19:33:42, 1.36s/it] 16%|█▋ | 10113/61904 [4:57:24<19:10:53, 1.33s/it] 16%|█▋ | 10114/61904 [4:57:25<19:19:17, 1.34s/it] 16%|█▋ | 10115/61904 [4:57:26<19:24:44, 1.35s/it] 16%|█▋ | 10116/61904 [4:57:28<19:21:03, 1.35s/it] 16%|█▋ | 10117/61904 [4:57:29<18:50:32, 1.31s/it] 16%|█▋ | 10118/61904 [4:57:30<18:59:03, 1.32s/it] 16%|█▋ | 10119/61904 [4:57:32<20:00:05, 1.39s/it] 16%|█▋ | 10120/61904 [4:57:33<19:35:53, 1.36s/it] {'loss': 2.7789, 'learning_rate': 1.8392324646700374e-07, 'epoch': 2.62} + 16%|█▋ | 10120/61904 [4:57:33<19:35:53, 1.36s/it] 16%|█▋ | 10121/61904 [4:57:34<19:22:00, 1.35s/it] 16%|█▋ | 10122/61904 [4:57:36<19:38:32, 1.37s/it] 16%|█▋ | 10123/61904 [4:57:37<18:56:31, 1.32s/it] 16%|█▋ | 10124/61904 [4:57:38<19:29:41, 1.36s/it] 16%|█▋ | 10125/61904 [4:57:40<20:41:15, 1.44s/it] 16%|█▋ | 10126/61904 [4:57:41<20:21:16, 1.42s/it] 16%|█▋ | 10127/61904 [4:57:43<20:16:23, 1.41s/it] 16%|█▋ | 10128/61904 [4:57:44<19:27:10, 1.35s/it] 16%|█▋ | 10129/61904 [4:57:46<20:07:57, 1.40s/it] 16%|█▋ | 10130/61904 [4:57:47<19:21:19, 1.35s/it] 16%|█▋ | 10131/61904 [4:57:48<19:37:01, 1.36s/it] 16%|█▋ | 10132/61904 [4:57:50<19:48:10, 1.38s/it] 16%|█▋ | 10133/61904 [4:57:51<19:37:17, 1.36s/it] 16%|█▋ | 10134/61904 [4:57:52<19:30:32, 1.36s/it] 16%|█▋ | 10135/61904 [4:57:53<19:01:15, 1.32s/it] 16%|█▋ | 10136/61904 [4:57:55<19:05:27, 1.33s/it] 16%|█▋ | 10137/61904 [4:57:56<18:48:56, 1.31s/it] 16%|█▋ | 10138/61904 [4:57:57<18:56:47, 1.32s/it] 16%|█▋ | 10139/61904 [4:57:59<19:31:04, 1.36s/it] 16%|█▋ | 10140/61904 [4:58:00<19:22:49, 1.35s/it] {'loss': 2.8891, 'learning_rate': 1.838908336574614e-07, 'epoch': 2.62} + 16%|█▋ | 10140/61904 [4:58:00<19:22:49, 1.35s/it] 16%|█▋ | 10141/61904 [4:58:01<18:58:55, 1.32s/it] 16%|█▋ | 10142/61904 [4:58:03<18:49:43, 1.31s/it] 16%|█▋ | 10143/61904 [4:58:04<18:52:24, 1.31s/it] 16%|█▋ | 10144/61904 [4:58:06<19:26:29, 1.35s/it] 16%|█▋ | 10145/61904 [4:58:07<19:26:50, 1.35s/it] 16%|█▋ | 10146/61904 [4:58:08<19:28:35, 1.35s/it] 16%|█▋ | 10147/61904 [4:58:09<19:01:06, 1.32s/it] 16%|█▋ | 10148/61904 [4:58:11<19:03:16, 1.33s/it] 16%|█▋ | 10149/61904 [4:58:12<18:34:24, 1.29s/it] 16%|█▋ | 10150/61904 [4:58:13<18:46:37, 1.31s/it] 16%|█▋ | 10151/61904 [4:58:15<19:16:29, 1.34s/it] 16%|█▋ | 10152/61904 [4:58:16<19:16:26, 1.34s/it] 16%|█▋ | 10153/61904 [4:58:18<19:48:58, 1.38s/it] 16%|█▋ | 10154/61904 [4:58:19<19:21:46, 1.35s/it] 16%|█▋ | 10155/61904 [4:58:20<19:56:09, 1.39s/it] 16%|█▋ | 10156/61904 [4:58:22<20:13:15, 1.41s/it] 16%|█▋ | 10157/61904 [4:58:23<19:42:02, 1.37s/it] 16%|█▋ | 10158/61904 [4:58:24<19:20:10, 1.35s/it] 16%|█▋ | 10159/61904 [4:58:26<19:15:44, 1.34s/it] 16%|█▋ | 10160/61904 [4:58:27<20:14:03, 1.41s/it] {'loss': 2.8305, 'learning_rate': 1.838584208479191e-07, 'epoch': 2.63} + 16%|█▋ | 10160/61904 [4:58:27<20:14:03, 1.41s/it] 16%|█▋ | 10161/61904 [4:58:29<19:44:20, 1.37s/it] 16%|█▋ | 10162/61904 [4:58:30<19:10:44, 1.33s/it] 16%|█▋ | 10163/61904 [4:58:31<19:15:32, 1.34s/it] 16%|█▋ | 10164/61904 [4:58:32<18:55:31, 1.32s/it] 16%|█▋ | 10165/61904 [4:58:34<19:10:09, 1.33s/it] 16%|█▋ | 10166/61904 [4:58:35<19:43:19, 1.37s/it] 16%|█▋ | 10167/61904 [4:58:37<19:46:46, 1.38s/it] 16%|█▋ | 10168/61904 [4:58:38<19:38:41, 1.37s/it] 16%|█▋ | 10169/61904 [4:58:39<19:09:23, 1.33s/it] 16%|█▋ | 10170/61904 [4:58:40<18:50:31, 1.31s/it] 16%|█▋ | 10171/61904 [4:58:42<19:16:45, 1.34s/it] 16%|█▋ | 10172/61904 [4:58:43<19:06:44, 1.33s/it] 16%|█▋ | 10173/61904 [4:58:44<18:36:48, 1.30s/it] 16%|█▋ | 10174/61904 [4:58:46<18:53:11, 1.31s/it] 16%|█▋ | 10175/61904 [4:58:47<19:07:05, 1.33s/it] 16%|█▋ | 10176/61904 [4:58:48<18:35:52, 1.29s/it] 16%|█▋ | 10177/61904 [4:58:50<20:24:44, 1.42s/it] 16%|█▋ | 10178/61904 [4:58:51<19:32:07, 1.36s/it] 16%|█▋ | 10179/61904 [4:58:53<19:42:50, 1.37s/it] 16%|█▋ | 10180/61904 [4:58:54<19:27:58, 1.35s/it] {'loss': 2.8333, 'learning_rate': 1.8382600803837676e-07, 'epoch': 2.63} + 16%|█▋ | 10180/61904 [4:58:54<19:27:58, 1.35s/it] 16%|█▋ | 10181/61904 [4:58:55<19:15:27, 1.34s/it] 16%|█▋ | 10182/61904 [4:58:57<19:34:00, 1.36s/it] 16%|█▋ | 10183/61904 [4:58:58<19:40:37, 1.37s/it] 16%|█▋ | 10184/61904 [4:59:00<20:14:39, 1.41s/it] 16%|█▋ | 10185/61904 [4:59:01<20:06:34, 1.40s/it] 16%|█▋ | 10186/61904 [4:59:02<19:41:43, 1.37s/it] 16%|█▋ | 10187/61904 [4:59:04<19:02:10, 1.33s/it] 16%|█▋ | 10188/61904 [4:59:05<19:19:47, 1.35s/it] 16%|█▋ | 10189/61904 [4:59:06<19:26:31, 1.35s/it] 16%|█▋ | 10190/61904 [4:59:08<19:13:34, 1.34s/it] 16%|█▋ | 10191/61904 [4:59:09<20:06:57, 1.40s/it] 16%|█▋ | 10192/61904 [4:59:10<19:40:22, 1.37s/it] 16%|█▋ | 10193/61904 [4:59:12<18:51:42, 1.31s/it] 16%|█▋ | 10194/61904 [4:59:13<18:44:29, 1.30s/it] 16%|█▋ | 10195/61904 [4:59:14<18:49:49, 1.31s/it] 16%|█▋ | 10196/61904 [4:59:16<20:15:06, 1.41s/it] 16%|█▋ | 10197/61904 [4:59:17<20:15:54, 1.41s/it] 16%|█▋ | 10198/61904 [4:59:19<19:58:23, 1.39s/it] 16%|█▋ | 10199/61904 [4:59:20<19:48:02, 1.38s/it] 16%|█▋ | 10200/61904 [4:59:21<19:35:46, 1.36s/it] {'loss': 2.8771, 'learning_rate': 1.8379359522883442e-07, 'epoch': 2.64} + 16%|█▋ | 10200/61904 [4:59:21<19:35:46, 1.36s/it] 16%|█▋ | 10201/61904 [4:59:23<19:42:39, 1.37s/it] 16%|█▋ | 10202/61904 [4:59:24<20:00:45, 1.39s/it] 16%|█▋ | 10203/61904 [4:59:25<19:52:55, 1.38s/it] 16%|█▋ | 10204/61904 [4:59:27<20:17:56, 1.41s/it] 16%|█▋ | 10205/61904 [4:59:28<19:37:20, 1.37s/it] 16%|█▋ | 10206/61904 [4:59:30<19:28:04, 1.36s/it] 16%|█▋ | 10207/61904 [4:59:31<19:44:16, 1.37s/it] 16%|█▋ | 10208/61904 [4:59:32<19:07:48, 1.33s/it] 16%|█▋ | 10209/61904 [4:59:33<18:54:57, 1.32s/it] 16%|█▋ | 10210/61904 [4:59:35<18:41:27, 1.30s/it] 16%|█▋ | 10211/61904 [4:59:36<18:38:14, 1.30s/it] 16%|█▋ | 10212/61904 [4:59:37<18:50:39, 1.31s/it] 16%|█▋ | 10213/61904 [4:59:39<19:25:42, 1.35s/it] 16%|█▋ | 10214/61904 [4:59:40<19:31:41, 1.36s/it] 17%|█▋ | 10215/61904 [4:59:42<19:24:22, 1.35s/it] 17%|█▋ | 10216/61904 [4:59:43<19:30:35, 1.36s/it] 17%|█▋ | 10217/61904 [4:59:44<19:12:13, 1.34s/it] 17%|█▋ | 10218/61904 [4:59:46<19:06:05, 1.33s/it] 17%|█▋ | 10219/61904 [4:59:47<20:06:07, 1.40s/it] 17%|█▋ | 10220/61904 [4:59:48<19:50:33, 1.38s/it] {'loss': 2.8126, 'learning_rate': 1.837611824192921e-07, 'epoch': 2.64} + 17%|█▋ | 10220/61904 [4:59:48<19:50:33, 1.38s/it] 17%|█▋ | 10221/61904 [4:59:50<19:05:09, 1.33s/it] 17%|█▋ | 10222/61904 [4:59:51<19:24:13, 1.35s/it] 17%|█▋ | 10223/61904 [4:59:52<19:31:56, 1.36s/it] 17%|█▋ | 10224/61904 [4:59:54<20:33:05, 1.43s/it] 17%|█▋ | 10225/61904 [4:59:55<20:42:25, 1.44s/it] 17%|█▋ | 10226/61904 [4:59:57<20:14:53, 1.41s/it] 17%|█▋ | 10227/61904 [4:59:58<20:12:23, 1.41s/it] 17%|█▋ | 10228/61904 [5:00:00<19:47:09, 1.38s/it] 17%|█▋ | 10229/61904 [5:00:01<19:31:16, 1.36s/it] 17%|█▋ | 10230/61904 [5:00:02<19:14:35, 1.34s/it] 17%|█▋ | 10231/61904 [5:00:04<19:24:08, 1.35s/it] 17%|█▋ | 10232/61904 [5:00:05<19:50:12, 1.38s/it] 17%|█▋ | 10233/61904 [5:00:06<19:44:08, 1.38s/it] 17%|█▋ | 10234/61904 [5:00:08<19:18:04, 1.34s/it] 17%|█▋ | 10235/61904 [5:00:09<19:25:38, 1.35s/it] 17%|█▋ | 10236/61904 [5:00:10<19:21:47, 1.35s/it] 17%|█▋ | 10237/61904 [5:00:12<19:55:42, 1.39s/it] 17%|█▋ | 10238/61904 [5:00:13<20:14:43, 1.41s/it] 17%|█▋ | 10239/61904 [5:00:15<19:52:37, 1.39s/it] 17%|█▋ | 10240/61904 [5:00:16<20:10:24, 1.41s/it] {'loss': 2.8727, 'learning_rate': 1.8372876960974977e-07, 'epoch': 2.65} + 17%|█▋ | 10240/61904 [5:00:16<20:10:24, 1.41s/it] 17%|█▋ | 10241/61904 [5:00:17<20:10:36, 1.41s/it] 17%|█▋ | 10242/61904 [5:00:19<19:51:03, 1.38s/it] 17%|█▋ | 10243/61904 [5:00:20<19:41:15, 1.37s/it] 17%|█▋ | 10244/61904 [5:00:21<19:36:28, 1.37s/it] 17%|█▋ | 10245/61904 [5:00:23<19:52:18, 1.38s/it] 17%|█▋ | 10246/61904 [5:00:24<19:37:12, 1.37s/it] 17%|█▋ | 10247/61904 [5:00:26<20:11:16, 1.41s/it] 17%|█▋ | 10248/61904 [5:00:27<19:46:28, 1.38s/it] 17%|█▋ | 10249/61904 [5:00:28<19:45:04, 1.38s/it] 17%|█▋ | 10250/61904 [5:00:30<19:26:05, 1.35s/it] 17%|█▋ | 10251/61904 [5:00:31<19:17:23, 1.34s/it] 17%|█▋ | 10252/61904 [5:00:32<19:25:10, 1.35s/it] 17%|█▋ | 10253/61904 [5:00:34<18:52:29, 1.32s/it] 17%|█▋ | 10254/61904 [5:00:35<19:15:54, 1.34s/it] 17%|█▋ | 10255/61904 [5:00:36<19:26:09, 1.35s/it] 17%|█▋ | 10256/61904 [5:00:38<19:34:04, 1.36s/it] 17%|█▋ | 10257/61904 [5:00:39<19:18:28, 1.35s/it] 17%|█▋ | 10258/61904 [5:00:40<19:23:11, 1.35s/it] 17%|█▋ | 10259/61904 [5:00:42<19:49:54, 1.38s/it] 17%|█▋ | 10260/61904 [5:00:43<19:39:32, 1.37s/it] {'loss': 2.8323, 'learning_rate': 1.8369635680020743e-07, 'epoch': 2.65} + 17%|█▋ | 10260/61904 [5:00:43<19:39:32, 1.37s/it] 17%|█▋ | 10261/61904 [5:00:45<19:41:25, 1.37s/it] 17%|█▋ | 10262/61904 [5:00:46<19:15:00, 1.34s/it] 17%|█▋ | 10263/61904 [5:00:47<19:57:37, 1.39s/it] 17%|█▋ | 10264/61904 [5:00:49<19:55:25, 1.39s/it] 17%|█▋ | 10265/61904 [5:00:50<19:43:27, 1.38s/it] 17%|█▋ | 10266/61904 [5:00:52<20:22:02, 1.42s/it] 17%|█▋ | 10267/61904 [5:00:53<19:56:00, 1.39s/it] 17%|█▋ | 10268/61904 [5:00:54<19:58:14, 1.39s/it] 17%|█▋ | 10269/61904 [5:00:56<19:22:42, 1.35s/it] 17%|█▋ | 10270/61904 [5:00:57<19:18:12, 1.35s/it] 17%|█▋ | 10271/61904 [5:00:58<19:21:09, 1.35s/it] 17%|█▋ | 10272/61904 [5:01:00<19:36:53, 1.37s/it] 17%|█▋ | 10273/61904 [5:01:01<19:38:11, 1.37s/it] 17%|█▋ | 10274/61904 [5:01:03<20:01:32, 1.40s/it] 17%|█▋ | 10275/61904 [5:01:04<19:54:49, 1.39s/it] 17%|█▋ | 10276/61904 [5:01:05<19:51:02, 1.38s/it] 17%|█▋ | 10277/61904 [5:01:07<19:38:14, 1.37s/it] 17%|█▋ | 10278/61904 [5:01:08<19:23:58, 1.35s/it] 17%|█▋ | 10279/61904 [5:01:09<19:21:49, 1.35s/it] 17%|█▋ | 10280/61904 [5:01:11<19:01:40, 1.33s/it] {'loss': 2.7787, 'learning_rate': 1.8366394399066512e-07, 'epoch': 2.66} + 17%|█▋ | 10280/61904 [5:01:11<19:01:40, 1.33s/it] 17%|█▋ | 10281/61904 [5:01:12<19:40:15, 1.37s/it] 17%|█▋ | 10282/61904 [5:01:13<19:15:39, 1.34s/it] 17%|█▋ | 10283/61904 [5:01:15<19:05:04, 1.33s/it] 17%|█▋ | 10284/61904 [5:01:16<20:07:18, 1.40s/it] 17%|█▋ | 10285/61904 [5:01:18<19:33:47, 1.36s/it] 17%|█▋ | 10286/61904 [5:01:19<19:12:04, 1.34s/it] 17%|█▋ | 10287/61904 [5:01:20<18:44:04, 1.31s/it] 17%|█▋ | 10288/61904 [5:01:21<18:47:48, 1.31s/it] 17%|█▋ | 10289/61904 [5:01:23<19:24:44, 1.35s/it] 17%|█▋ | 10290/61904 [5:01:24<19:01:55, 1.33s/it] 17%|█▋ | 10291/61904 [5:01:25<19:09:58, 1.34s/it] 17%|█▋ | 10292/61904 [5:01:27<18:52:12, 1.32s/it] 17%|█▋ | 10293/61904 [5:01:28<18:54:07, 1.32s/it] 17%|█▋ | 10294/61904 [5:01:30<20:02:36, 1.40s/it] 17%|█▋ | 10295/61904 [5:01:31<20:28:00, 1.43s/it] 17%|█▋ | 10296/61904 [5:01:33<20:24:27, 1.42s/it] 17%|█▋ | 10297/61904 [5:01:34<20:21:04, 1.42s/it] 17%|█▋ | 10298/61904 [5:01:35<19:42:20, 1.37s/it] 17%|█▋ | 10299/61904 [5:01:36<19:19:43, 1.35s/it] 17%|█▋ | 10300/61904 [5:01:38<19:01:38, 1.33s/it] {'loss': 2.8147, 'learning_rate': 1.8363153118112275e-07, 'epoch': 2.66} + 17%|█▋ | 10300/61904 [5:01:38<19:01:38, 1.33s/it] 17%|█▋ | 10301/61904 [5:01:39<18:49:10, 1.31s/it] 17%|█▋ | 10302/61904 [5:01:40<19:07:35, 1.33s/it] 17%|█▋ | 10303/61904 [5:01:42<20:03:10, 1.40s/it] 17%|█▋ | 10304/61904 [5:01:43<20:06:05, 1.40s/it] 17%|█▋ | 10305/61904 [5:01:45<20:31:36, 1.43s/it] 17%|█▋ | 10306/61904 [5:01:46<20:05:26, 1.40s/it] 17%|█▋ | 10307/61904 [5:01:48<19:40:40, 1.37s/it] 17%|█▋ | 10308/61904 [5:01:49<19:51:45, 1.39s/it] 17%|█▋ | 10309/61904 [5:01:50<20:29:07, 1.43s/it] 17%|█▋ | 10310/61904 [5:01:52<19:49:00, 1.38s/it] 17%|█▋ | 10311/61904 [5:01:53<19:59:55, 1.40s/it] 17%|█▋ | 10312/61904 [5:01:55<20:14:36, 1.41s/it] 17%|█▋ | 10313/61904 [5:01:56<20:33:42, 1.43s/it] 17%|█▋ | 10314/61904 [5:01:57<20:21:53, 1.42s/it] 17%|█▋ | 10315/61904 [5:01:59<19:39:07, 1.37s/it] 17%|█▋ | 10316/61904 [5:02:00<19:40:47, 1.37s/it] 17%|█▋ | 10317/61904 [5:02:02<20:11:54, 1.41s/it] 17%|█▋ | 10318/61904 [5:02:03<19:37:15, 1.37s/it] 17%|█▋ | 10319/61904 [5:02:04<19:23:38, 1.35s/it] 17%|█▋ | 10320/61904 [5:02:06<19:13:46, 1.34s/it] {'loss': 2.7373, 'learning_rate': 1.8359911837158044e-07, 'epoch': 2.67} + 17%|█▋ | 10320/61904 [5:02:06<19:13:46, 1.34s/it] 17%|█▋ | 10321/61904 [5:02:07<19:14:55, 1.34s/it] 17%|█▋ | 10322/61904 [5:02:08<18:44:42, 1.31s/it] 17%|█▋ | 10323/61904 [5:02:09<19:03:39, 1.33s/it] 17%|█▋ | 10324/61904 [5:02:11<19:38:33, 1.37s/it] 17%|█▋ | 10325/61904 [5:02:12<19:24:30, 1.35s/it] 17%|█▋ | 10326/61904 [5:02:14<19:40:00, 1.37s/it] 17%|█▋ | 10327/61904 [5:02:15<19:50:35, 1.39s/it] 17%|█▋ | 10328/61904 [5:02:17<20:18:44, 1.42s/it] 17%|█▋ | 10329/61904 [5:02:18<20:09:15, 1.41s/it] 17%|█▋ | 10330/61904 [5:02:19<19:58:07, 1.39s/it] 17%|█▋ | 10331/61904 [5:02:21<20:10:36, 1.41s/it] 17%|█▋ | 10332/61904 [5:02:22<20:32:30, 1.43s/it] 17%|█▋ | 10333/61904 [5:02:24<20:22:10, 1.42s/it] 17%|█▋ | 10334/61904 [5:02:25<19:57:28, 1.39s/it] 17%|█▋ | 10335/61904 [5:02:26<19:23:34, 1.35s/it] 17%|█▋ | 10336/61904 [5:02:28<19:15:38, 1.34s/it] 17%|█▋ | 10337/61904 [5:02:29<19:10:37, 1.34s/it] 17%|█▋ | 10338/61904 [5:02:30<19:57:37, 1.39s/it] 17%|█▋ | 10339/61904 [5:02:32<20:16:44, 1.42s/it] 17%|█▋ | 10340/61904 [5:02:33<19:59:21, 1.40s/it] {'loss': 2.8365, 'learning_rate': 1.835667055620381e-07, 'epoch': 2.67} + 17%|█▋ | 10340/61904 [5:02:33<19:59:21, 1.40s/it] 17%|█▋ | 10341/61904 [5:02:35<20:23:33, 1.42s/it] 17%|█▋ | 10342/61904 [5:02:36<20:25:25, 1.43s/it] 17%|█▋ | 10343/61904 [5:02:37<20:02:53, 1.40s/it] 17%|█▋ | 10344/61904 [5:02:39<19:53:20, 1.39s/it] 17%|█▋ | 10345/61904 [5:02:40<19:58:50, 1.40s/it] 17%|█▋ | 10346/61904 [5:02:42<20:17:08, 1.42s/it] 17%|█▋ | 10347/61904 [5:02:43<20:27:00, 1.43s/it] 17%|█▋ | 10348/61904 [5:02:45<20:17:30, 1.42s/it] 17%|█▋ | 10349/61904 [5:02:46<20:09:34, 1.41s/it] 17%|█▋ | 10350/61904 [5:02:47<19:45:14, 1.38s/it] 17%|█▋ | 10351/61904 [5:02:49<19:30:49, 1.36s/it] 17%|█▋ | 10352/61904 [5:02:50<19:20:07, 1.35s/it] 17%|█▋ | 10353/61904 [5:02:51<19:35:36, 1.37s/it] 17%|█▋ | 10354/61904 [5:02:53<19:22:17, 1.35s/it] 17%|█▋ | 10355/61904 [5:02:54<19:43:02, 1.38s/it] 17%|█▋ | 10356/61904 [5:02:55<19:25:09, 1.36s/it] 17%|█▋ | 10357/61904 [5:02:57<20:07:29, 1.41s/it] 17%|█▋ | 10358/61904 [5:02:58<20:40:09, 1.44s/it] 17%|█▋ | 10359/61904 [5:03:00<20:11:09, 1.41s/it] 17%|█▋ | 10360/61904 [5:03:01<19:28:36, 1.36s/it] {'loss': 2.7914, 'learning_rate': 1.8353429275249577e-07, 'epoch': 2.68} + 17%|█▋ | 10360/61904 [5:03:01<19:28:36, 1.36s/it] 17%|█▋ | 10361/61904 [5:03:02<19:27:48, 1.36s/it] 17%|█▋ | 10362/61904 [5:03:04<19:45:54, 1.38s/it] 17%|█▋ | 10363/61904 [5:03:05<19:42:30, 1.38s/it] 17%|█▋ | 10364/61904 [5:03:07<19:37:43, 1.37s/it] 17%|█▋ | 10365/61904 [5:03:08<19:32:47, 1.37s/it] 17%|█▋ | 10366/61904 [5:03:09<19:09:32, 1.34s/it] 17%|█▋ | 10367/61904 [5:03:11<19:27:34, 1.36s/it] 17%|█▋ | 10368/61904 [5:03:12<19:14:22, 1.34s/it] 17%|█▋ | 10369/61904 [5:03:13<19:08:44, 1.34s/it] 17%|█▋ | 10370/61904 [5:03:15<19:09:33, 1.34s/it] 17%|█▋ | 10371/61904 [5:03:16<19:07:43, 1.34s/it] 17%|█▋ | 10372/61904 [5:03:17<18:51:57, 1.32s/it] 17%|█▋ | 10373/61904 [5:03:18<18:59:46, 1.33s/it] 17%|█▋ | 10374/61904 [5:03:20<19:04:35, 1.33s/it] 17%|█▋ | 10375/61904 [5:03:21<19:22:40, 1.35s/it] 17%|█▋ | 10376/61904 [5:03:23<19:15:22, 1.35s/it] 17%|█▋ | 10377/61904 [5:03:24<20:29:12, 1.43s/it] 17%|█▋ | 10378/61904 [5:03:26<20:32:09, 1.43s/it] 17%|█▋ | 10379/61904 [5:03:27<20:30:59, 1.43s/it] 17%|█▋ | 10380/61904 [5:03:28<19:36:32, 1.37s/it] {'loss': 2.8569, 'learning_rate': 1.8350187994295346e-07, 'epoch': 2.68} + 17%|█▋ | 10380/61904 [5:03:28<19:36:32, 1.37s/it] 17%|█▋ | 10381/61904 [5:03:30<19:45:54, 1.38s/it] 17%|█▋ | 10382/61904 [5:03:31<19:32:47, 1.37s/it] 17%|█▋ | 10383/61904 [5:03:32<19:36:06, 1.37s/it] 17%|█▋ | 10384/61904 [5:03:34<19:07:46, 1.34s/it] 17%|█▋ | 10385/61904 [5:03:35<19:25:40, 1.36s/it] 17%|█▋ | 10386/61904 [5:03:37<20:10:31, 1.41s/it] 17%|█▋ | 10387/61904 [5:03:38<20:17:26, 1.42s/it] 17%|█▋ | 10388/61904 [5:03:39<19:48:55, 1.38s/it] 17%|█▋ | 10389/61904 [5:03:41<20:22:07, 1.42s/it] 17%|█▋ | 10390/61904 [5:03:42<20:56:42, 1.46s/it] 17%|█▋ | 10391/61904 [5:03:44<20:48:24, 1.45s/it] 17%|█▋ | 10392/61904 [5:03:45<20:31:16, 1.43s/it] 17%|█▋ | 10393/61904 [5:03:47<20:21:50, 1.42s/it] 17%|█▋ | 10394/61904 [5:03:48<20:06:35, 1.41s/it] 17%|█▋ | 10395/61904 [5:03:49<20:00:01, 1.40s/it] 17%|█▋ | 10396/61904 [5:03:51<19:54:35, 1.39s/it] 17%|█▋ | 10397/61904 [5:03:52<20:04:31, 1.40s/it] 17%|█▋ | 10398/61904 [5:03:54<19:53:45, 1.39s/it] 17%|█▋ | 10399/61904 [5:03:55<19:48:30, 1.38s/it] 17%|█▋ | 10400/61904 [5:03:56<20:18:17, 1.42s/it] {'loss': 2.8851, 'learning_rate': 1.8346946713341112e-07, 'epoch': 2.69} + 17%|█▋ | 10400/61904 [5:03:56<20:18:17, 1.42s/it] 17%|█▋ | 10401/61904 [5:03:58<19:32:53, 1.37s/it] 17%|█▋ | 10402/61904 [5:03:59<19:14:17, 1.34s/it] 17%|█▋ | 10403/61904 [5:04:00<19:03:44, 1.33s/it] 17%|█▋ | 10404/61904 [5:04:02<19:14:23, 1.34s/it] 17%|█▋ | 10405/61904 [5:04:03<19:55:55, 1.39s/it] 17%|█▋ | 10406/61904 [5:04:05<20:17:51, 1.42s/it] 17%|█▋ | 10407/61904 [5:04:06<20:13:09, 1.41s/it] 17%|█▋ | 10408/61904 [5:04:07<19:44:03, 1.38s/it] 17%|█▋ | 10409/61904 [5:04:09<19:47:03, 1.38s/it] 17%|█▋ | 10410/61904 [5:04:10<19:51:21, 1.39s/it] 17%|█▋ | 10411/61904 [5:04:11<19:13:22, 1.34s/it] 17%|█▋ | 10412/61904 [5:04:13<19:09:19, 1.34s/it] 17%|█▋ | 10413/61904 [5:04:14<19:43:33, 1.38s/it] 17%|█▋ | 10414/61904 [5:04:15<19:27:11, 1.36s/it] 17%|█▋ | 10415/61904 [5:04:17<20:19:34, 1.42s/it] 17%|█▋ | 10416/61904 [5:04:18<19:39:11, 1.37s/it] 17%|█▋ | 10417/61904 [5:04:20<19:05:24, 1.33s/it] 17%|█▋ | 10418/61904 [5:04:21<18:57:17, 1.33s/it] 17%|█▋ | 10419/61904 [5:04:22<19:06:47, 1.34s/it] 17%|█▋ | 10420/61904 [5:04:24<19:29:15, 1.36s/it] {'loss': 2.8061, 'learning_rate': 1.8343705432386878e-07, 'epoch': 2.69} + 17%|█▋ | 10420/61904 [5:04:24<19:29:15, 1.36s/it] 17%|█▋ | 10421/61904 [5:04:25<18:55:01, 1.32s/it] 17%|█▋ | 10422/61904 [5:04:26<19:12:17, 1.34s/it] 17%|█▋ | 10423/61904 [5:04:28<19:25:13, 1.36s/it] 17%|█▋ | 10424/61904 [5:04:29<19:38:00, 1.37s/it] 17%|█▋ | 10425/61904 [5:04:30<19:30:20, 1.36s/it] 17%|█▋ | 10426/61904 [5:04:32<19:35:44, 1.37s/it] 17%|█▋ | 10427/61904 [5:04:33<19:28:35, 1.36s/it] 17%|█▋ | 10428/61904 [5:04:34<19:13:06, 1.34s/it] 17%|█▋ | 10429/61904 [5:04:36<19:03:58, 1.33s/it] 17%|█▋ | 10430/61904 [5:04:37<19:28:32, 1.36s/it] 17%|█▋ | 10431/61904 [5:04:38<19:18:32, 1.35s/it] 17%|█▋ | 10432/61904 [5:04:40<19:16:42, 1.35s/it] 17%|█▋ | 10433/61904 [5:04:41<19:05:30, 1.34s/it] 17%|█▋ | 10434/61904 [5:04:42<18:59:07, 1.33s/it] 17%|█▋ | 10435/61904 [5:04:44<19:06:23, 1.34s/it] 17%|█▋ | 10436/61904 [5:04:45<19:09:01, 1.34s/it] 17%|█▋ | 10437/61904 [5:04:46<18:34:50, 1.30s/it] 17%|█▋ | 10438/61904 [5:04:48<19:19:15, 1.35s/it] 17%|█▋ | 10439/61904 [5:04:49<19:26:57, 1.36s/it] 17%|█▋ | 10440/61904 [5:04:51<19:50:53, 1.39s/it] {'loss': 2.8493, 'learning_rate': 1.8340464151432647e-07, 'epoch': 2.7} + 17%|█▋ | 10440/61904 [5:04:51<19:50:53, 1.39s/it] 17%|█▋ | 10441/61904 [5:04:52<19:57:55, 1.40s/it] 17%|█▋ | 10442/61904 [5:04:53<19:45:06, 1.38s/it] 17%|█▋ | 10443/61904 [5:04:55<19:51:54, 1.39s/it] 17%|█▋ | 10444/61904 [5:04:56<19:45:24, 1.38s/it] 17%|█▋ | 10445/61904 [5:04:58<19:32:13, 1.37s/it] 17%|█▋ | 10446/61904 [5:04:59<19:14:06, 1.35s/it] 17%|█▋ | 10447/61904 [5:05:00<19:36:01, 1.37s/it] 17%|█▋ | 10448/61904 [5:05:02<19:46:43, 1.38s/it] 17%|█▋ | 10449/61904 [5:05:03<19:20:38, 1.35s/it] 17%|█▋ | 10450/61904 [5:05:04<19:45:13, 1.38s/it] 17%|█▋ | 10451/61904 [5:05:06<20:35:23, 1.44s/it] 17%|█▋ | 10452/61904 [5:05:07<20:15:10, 1.42s/it] 17%|█▋ | 10453/61904 [5:05:09<19:51:42, 1.39s/it] 17%|█▋ | 10454/61904 [5:05:10<19:06:22, 1.34s/it] 17%|█▋ | 10455/61904 [5:05:11<18:44:02, 1.31s/it] 17%|█▋ | 10456/61904 [5:05:13<19:01:23, 1.33s/it] 17%|█▋ | 10457/61904 [5:05:14<19:08:47, 1.34s/it] 17%|█▋ | 10458/61904 [5:05:15<18:44:44, 1.31s/it] 17%|█▋ | 10459/61904 [5:05:16<18:49:05, 1.32s/it] 17%|█▋ | 10460/61904 [5:05:18<19:35:04, 1.37s/it] {'loss': 2.7935, 'learning_rate': 1.8337222870478413e-07, 'epoch': 2.7} + 17%|█▋ | 10460/61904 [5:05:18<19:35:04, 1.37s/it] 17%|█▋ | 10461/61904 [5:05:19<19:39:37, 1.38s/it] 17%|█▋ | 10462/61904 [5:05:21<20:00:24, 1.40s/it] 17%|█▋ | 10463/61904 [5:05:22<20:04:44, 1.41s/it] 17%|█▋ | 10464/61904 [5:05:24<20:08:38, 1.41s/it] 17%|█▋ | 10465/61904 [5:05:25<20:02:59, 1.40s/it] 17%|█▋ | 10466/61904 [5:05:27<21:01:42, 1.47s/it] 17%|█▋ | 10467/61904 [5:05:28<20:50:20, 1.46s/it] 17%|█▋ | 10468/61904 [5:05:29<20:21:10, 1.42s/it] 17%|█▋ | 10469/61904 [5:05:31<20:49:03, 1.46s/it] 17%|█▋ | 10470/61904 [5:05:32<20:40:18, 1.45s/it] 17%|█▋ | 10471/61904 [5:05:34<20:06:58, 1.41s/it] 17%|█▋ | 10472/61904 [5:05:35<19:51:17, 1.39s/it] 17%|█▋ | 10473/61904 [5:05:37<20:12:58, 1.42s/it] 17%|█▋ | 10474/61904 [5:05:38<19:54:05, 1.39s/it] 17%|█▋ | 10475/61904 [5:05:39<19:23:39, 1.36s/it] 17%|█▋ | 10476/61904 [5:05:40<18:58:31, 1.33s/it] 17%|█▋ | 10477/61904 [5:05:42<19:35:38, 1.37s/it] 17%|█▋ | 10478/61904 [5:05:43<19:35:31, 1.37s/it] 17%|█▋ | 10479/61904 [5:05:45<19:59:55, 1.40s/it] 17%|█▋ | 10480/61904 [5:05:46<20:05:38, 1.41s/it] {'loss': 2.8418, 'learning_rate': 1.833398158952418e-07, 'epoch': 2.71} + 17%|█▋ | 10480/61904 [5:05:46<20:05:38, 1.41s/it] 17%|█▋ | 10481/61904 [5:05:48<20:18:53, 1.42s/it] 17%|█▋ | 10482/61904 [5:05:49<20:26:30, 1.43s/it] 17%|█▋ | 10483/61904 [5:05:50<19:38:37, 1.38s/it] 17%|█▋ | 10484/61904 [5:05:52<19:23:41, 1.36s/it] 17%|█▋ | 10485/61904 [5:05:53<19:14:46, 1.35s/it] 17%|█▋ | 10486/61904 [5:05:54<19:50:23, 1.39s/it] 17%|█▋ | 10487/61904 [5:05:56<19:51:15, 1.39s/it] 17%|█▋ | 10488/61904 [5:05:57<19:42:01, 1.38s/it] 17%|█▋ | 10489/61904 [5:05:59<19:35:02, 1.37s/it] 17%|█▋ | 10490/61904 [5:06:00<19:34:38, 1.37s/it] 17%|█▋ | 10491/61904 [5:06:01<19:21:34, 1.36s/it] 17%|█▋ | 10492/61904 [5:06:03<19:15:50, 1.35s/it] 17%|█▋ | 10493/61904 [5:06:04<20:00:19, 1.40s/it] 17%|█▋ | 10494/61904 [5:06:05<19:16:32, 1.35s/it] 17%|█▋ | 10495/61904 [5:06:07<19:51:56, 1.39s/it] 17%|█▋ | 10496/61904 [5:06:08<20:19:12, 1.42s/it] 17%|█▋ | 10497/61904 [5:06:10<20:06:34, 1.41s/it] 17%|█▋ | 10498/61904 [5:06:11<19:56:16, 1.40s/it] 17%|█▋ | 10499/61904 [5:06:12<19:17:47, 1.35s/it] 17%|█▋ | 10500/61904 [5:06:14<19:43:23, 1.38s/it] {'loss': 2.7704, 'learning_rate': 1.8330740308569948e-07, 'epoch': 2.71} + 17%|█▋ | 10500/61904 [5:06:14<19:43:23, 1.38s/it] 17%|█▋ | 10501/61904 [5:06:15<20:20:32, 1.42s/it] 17%|█▋ | 10502/61904 [5:06:17<20:29:14, 1.43s/it] 17%|█▋ | 10503/61904 [5:06:18<21:05:21, 1.48s/it] 17%|█▋ | 10504/61904 [5:06:20<21:07:03, 1.48s/it] 17%|█▋ | 10505/61904 [5:06:21<20:38:40, 1.45s/it] 17%|█▋ | 10506/61904 [5:06:22<20:04:09, 1.41s/it] 17%|█▋ | 10507/61904 [5:06:24<20:04:29, 1.41s/it] 17%|█▋ | 10508/61904 [5:06:25<19:31:26, 1.37s/it] 17%|█▋ | 10509/61904 [5:06:27<20:07:19, 1.41s/it] 17%|█▋ | 10510/61904 [5:06:28<20:05:39, 1.41s/it] 17%|█▋ | 10511/61904 [5:06:29<19:46:30, 1.39s/it] 17%|█▋ | 10512/61904 [5:06:31<20:20:27, 1.42s/it] 17%|█▋ | 10513/61904 [5:06:32<20:05:43, 1.41s/it] 17%|█▋ | 10514/61904 [5:06:34<19:44:58, 1.38s/it] 17%|█▋ | 10515/61904 [5:06:35<20:05:29, 1.41s/it] 17%|█▋ | 10516/61904 [5:06:36<20:12:59, 1.42s/it] 17%|█▋ | 10517/61904 [5:06:38<20:14:54, 1.42s/it] 17%|█▋ | 10518/61904 [5:06:39<19:15:06, 1.35s/it] 17%|█▋ | 10519/61904 [5:06:40<19:01:41, 1.33s/it] 17%|█▋ | 10520/61904 [5:06:42<19:55:05, 1.40s/it] {'loss': 2.87, 'learning_rate': 1.8327499027615711e-07, 'epoch': 2.72} + 17%|█▋ | 10520/61904 [5:06:42<19:55:05, 1.40s/it] 17%|█▋ | 10521/61904 [5:06:43<19:55:33, 1.40s/it] 17%|█▋ | 10522/61904 [5:06:45<20:13:17, 1.42s/it] 17%|█▋ | 10523/61904 [5:06:46<20:12:48, 1.42s/it] 17%|█▋ | 10524/61904 [5:06:47<19:44:04, 1.38s/it] 17%|█▋ | 10525/61904 [5:06:49<19:05:44, 1.34s/it] 17%|█▋ | 10526/61904 [5:06:50<19:18:00, 1.35s/it] 17%|█▋ | 10527/61904 [5:06:51<18:51:37, 1.32s/it] 17%|█▋ | 10528/61904 [5:06:53<19:31:33, 1.37s/it] 17%|█▋ | 10529/61904 [5:06:54<19:11:07, 1.34s/it] 17%|█▋ | 10530/61904 [5:06:56<19:24:05, 1.36s/it] 17%|█▋ | 10531/61904 [5:06:57<20:00:16, 1.40s/it] 17%|█▋ | 10532/61904 [5:06:58<19:41:24, 1.38s/it] 17%|█▋ | 10533/61904 [5:07:00<20:08:50, 1.41s/it] 17%|█▋ | 10534/61904 [5:07:01<19:43:49, 1.38s/it] 17%|█▋ | 10535/61904 [5:07:03<19:44:53, 1.38s/it] 17%|█▋ | 10536/61904 [5:07:04<19:49:09, 1.39s/it] 17%|█▋ | 10537/61904 [5:07:05<19:28:02, 1.36s/it] 17%|█▋ | 10538/61904 [5:07:07<19:27:54, 1.36s/it] 17%|█▋ | 10539/61904 [5:07:08<19:30:35, 1.37s/it] 17%|█▋ | 10540/61904 [5:07:09<19:33:42, 1.37s/it] {'loss': 2.8444, 'learning_rate': 1.832425774666148e-07, 'epoch': 2.72} + 17%|█▋ | 10540/61904 [5:07:09<19:33:42, 1.37s/it] 17%|█▋ | 10541/61904 [5:07:11<19:53:03, 1.39s/it] 17%|█▋ | 10542/61904 [5:07:12<19:24:37, 1.36s/it] 17%|█▋ | 10543/61904 [5:07:13<19:34:11, 1.37s/it] 17%|█▋ | 10544/61904 [5:07:15<19:16:58, 1.35s/it] 17%|█▋ | 10545/61904 [5:07:16<18:54:31, 1.33s/it] 17%|█▋ | 10546/61904 [5:07:17<19:09:42, 1.34s/it] 17%|█▋ | 10547/61904 [5:07:19<18:57:50, 1.33s/it] 17%|█▋ | 10548/61904 [5:07:20<19:11:57, 1.35s/it] 17%|█▋ | 10549/61904 [5:07:21<19:16:42, 1.35s/it] 17%|█▋ | 10550/61904 [5:07:23<18:59:18, 1.33s/it] 17%|█▋ | 10551/61904 [5:07:24<18:58:56, 1.33s/it] 17%|█▋ | 10552/61904 [5:07:25<18:48:39, 1.32s/it] 17%|█▋ | 10553/61904 [5:07:27<19:09:58, 1.34s/it] 17%|█▋ | 10554/61904 [5:07:28<20:15:29, 1.42s/it] 17%|█▋ | 10555/61904 [5:07:30<20:49:34, 1.46s/it] 17%|█▋ | 10556/61904 [5:07:31<20:18:56, 1.42s/it] 17%|█▋ | 10557/61904 [5:07:33<20:36:17, 1.44s/it] 17%|█▋ | 10558/61904 [5:07:34<19:53:39, 1.39s/it] 17%|█▋ | 10559/61904 [5:07:35<19:31:26, 1.37s/it] 17%|█▋ | 10560/61904 [5:07:37<19:55:14, 1.40s/it] {'loss': 2.7719, 'learning_rate': 1.8321016465707246e-07, 'epoch': 2.73} + 17%|█▋ | 10560/61904 [5:07:37<19:55:14, 1.40s/it] 17%|█▋ | 10561/61904 [5:07:38<19:32:57, 1.37s/it] 17%|█▋ | 10562/61904 [5:07:39<18:58:29, 1.33s/it] 17%|█▋ | 10563/61904 [5:07:41<18:53:01, 1.32s/it] 17%|█▋ | 10564/61904 [5:07:42<19:25:02, 1.36s/it] 17%|█▋ | 10565/61904 [5:07:43<19:09:33, 1.34s/it] 17%|█▋ | 10566/61904 [5:07:45<19:24:47, 1.36s/it] 17%|█▋ | 10567/61904 [5:07:46<19:38:51, 1.38s/it] 17%|█▋ | 10568/61904 [5:07:48<20:01:34, 1.40s/it] 17%|█▋ | 10569/61904 [5:07:49<19:56:41, 1.40s/it] 17%|█▋ | 10570/61904 [5:07:50<19:29:47, 1.37s/it] 17%|█▋ | 10571/61904 [5:07:52<19:21:24, 1.36s/it] 17%|█▋ | 10572/61904 [5:07:53<19:39:02, 1.38s/it] 17%|█▋ | 10573/61904 [5:07:54<19:24:45, 1.36s/it] 17%|█▋ | 10574/61904 [5:07:56<19:17:05, 1.35s/it] 17%|█▋ | 10575/61904 [5:07:57<19:01:11, 1.33s/it] 17%|█▋ | 10576/61904 [5:07:58<19:06:38, 1.34s/it] 17%|█▋ | 10577/61904 [5:08:00<19:02:05, 1.34s/it] 17%|█▋ | 10578/61904 [5:08:01<19:22:05, 1.36s/it] 17%|█▋ | 10579/61904 [5:08:02<19:04:39, 1.34s/it] 17%|█▋ | 10580/61904 [5:08:04<19:23:08, 1.36s/it] {'loss': 2.823, 'learning_rate': 1.8317775184753013e-07, 'epoch': 2.73} + 17%|█▋ | 10580/61904 [5:08:04<19:23:08, 1.36s/it] 17%|█▋ | 10581/61904 [5:08:05<19:24:36, 1.36s/it] 17%|█▋ | 10582/61904 [5:08:07<19:18:40, 1.35s/it] 17%|█▋ | 10583/61904 [5:08:08<18:43:43, 1.31s/it] 17%|█▋ | 10584/61904 [5:08:09<19:03:31, 1.34s/it] 17%|█▋ | 10585/61904 [5:08:11<19:26:20, 1.36s/it] 17%|█▋ | 10586/61904 [5:08:12<19:00:11, 1.33s/it] 17%|█▋ | 10587/61904 [5:08:13<19:22:13, 1.36s/it] 17%|█▋ | 10588/61904 [5:08:15<19:50:01, 1.39s/it] 17%|█▋ | 10589/61904 [5:08:16<19:32:53, 1.37s/it] 17%|█▋ | 10590/61904 [5:08:17<19:32:18, 1.37s/it] 17%|█▋ | 10591/61904 [5:08:19<19:10:58, 1.35s/it] 17%|█▋ | 10592/61904 [5:08:20<19:31:03, 1.37s/it] 17%|█▋ | 10593/61904 [5:08:22<19:14:18, 1.35s/it] 17%|█▋ | 10594/61904 [5:08:23<18:59:35, 1.33s/it] 17%|█▋ | 10595/61904 [5:08:24<19:26:13, 1.36s/it] 17%|█▋ | 10596/61904 [5:08:26<19:22:54, 1.36s/it] 17%|█▋ | 10597/61904 [5:08:27<19:27:31, 1.37s/it] 17%|█▋ | 10598/61904 [5:08:28<19:38:33, 1.38s/it] 17%|█▋ | 10599/61904 [5:08:30<19:27:34, 1.37s/it] 17%|█▋ | 10600/61904 [5:08:31<19:20:19, 1.36s/it] {'loss': 2.8562, 'learning_rate': 1.8314533903798782e-07, 'epoch': 2.74} + 17%|█▋ | 10600/61904 [5:08:31<19:20:19, 1.36s/it] 17%|█▋ | 10601/61904 [5:08:32<18:55:48, 1.33s/it] 17%|█▋ | 10602/61904 [5:08:34<18:55:27, 1.33s/it] 17%|█▋ | 10603/61904 [5:08:35<19:06:07, 1.34s/it] 17%|█▋ | 10604/61904 [5:08:36<19:17:26, 1.35s/it] 17%|█▋ | 10605/61904 [5:08:38<18:54:20, 1.33s/it] 17%|█▋ | 10606/61904 [5:08:39<19:36:50, 1.38s/it] 17%|█▋ | 10607/61904 [5:08:40<19:06:25, 1.34s/it] 17%|█▋ | 10608/61904 [5:08:42<18:54:39, 1.33s/it] 17%|█▋ | 10609/61904 [5:08:43<19:12:45, 1.35s/it] 17%|█▋ | 10610/61904 [5:08:44<18:42:43, 1.31s/it] 17%|█▋ | 10611/61904 [5:08:46<18:59:24, 1.33s/it] 17%|█▋ | 10612/61904 [5:08:47<19:10:45, 1.35s/it] 17%|█▋ | 10613/61904 [5:08:48<19:23:54, 1.36s/it] 17%|█▋ | 10614/61904 [5:08:50<18:59:30, 1.33s/it] 17%|█▋ | 10615/61904 [5:08:51<19:24:17, 1.36s/it] 17%|█▋ | 10616/61904 [5:08:53<19:18:02, 1.35s/it] 17%|█▋ | 10617/61904 [5:08:54<18:53:02, 1.33s/it] 17%|█▋ | 10618/61904 [5:08:55<18:44:39, 1.32s/it] 17%|█▋ | 10619/61904 [5:08:56<18:58:22, 1.33s/it] 17%|█▋ | 10620/61904 [5:08:58<18:36:06, 1.31s/it] {'loss': 2.8903, 'learning_rate': 1.8311292622844548e-07, 'epoch': 2.74} + 17%|█▋ | 10620/61904 [5:08:58<18:36:06, 1.31s/it] 17%|█▋ | 10621/61904 [5:08:59<18:51:40, 1.32s/it] 17%|█▋ | 10622/61904 [5:09:00<19:11:49, 1.35s/it] 17%|█▋ | 10623/61904 [5:09:02<19:20:57, 1.36s/it] 17%|█▋ | 10624/61904 [5:09:03<19:40:47, 1.38s/it] 17%|█▋ | 10625/61904 [5:09:05<20:02:12, 1.41s/it] 17%|█▋ | 10626/61904 [5:09:06<19:29:03, 1.37s/it] 17%|█▋ | 10627/61904 [5:09:07<19:24:16, 1.36s/it] 17%|█▋ | 10628/61904 [5:09:09<19:05:00, 1.34s/it] 17%|█▋ | 10629/61904 [5:09:10<19:16:21, 1.35s/it] 17%|█▋ | 10630/61904 [5:09:11<19:20:06, 1.36s/it] 17%|█▋ | 10631/61904 [5:09:13<18:51:21, 1.32s/it] 17%|█▋ | 10632/61904 [5:09:14<19:11:53, 1.35s/it] 17%|█▋ | 10633/61904 [5:09:15<19:04:39, 1.34s/it] 17%|█▋ | 10634/61904 [5:09:17<18:42:51, 1.31s/it] 17%|█▋ | 10635/61904 [5:09:18<20:04:30, 1.41s/it] 17%|█▋ | 10636/61904 [5:09:20<20:13:50, 1.42s/it] 17%|█▋ | 10637/61904 [5:09:21<19:43:13, 1.38s/it] 17%|█▋ | 10638/61904 [5:09:23<20:19:01, 1.43s/it] 17%|█▋ | 10639/61904 [5:09:24<19:57:37, 1.40s/it] 17%|█▋ | 10640/61904 [5:09:25<19:19:12, 1.36s/it] {'loss': 2.8069, 'learning_rate': 1.8308051341890314e-07, 'epoch': 2.75} + 17%|█▋ | 10640/61904 [5:09:25<19:19:12, 1.36s/it] 17%|█▋ | 10641/61904 [5:09:26<19:12:56, 1.35s/it] 17%|█▋ | 10642/61904 [5:09:28<19:29:49, 1.37s/it] 17%|█▋ | 10643/61904 [5:09:29<20:02:09, 1.41s/it] 17%|█▋ | 10644/61904 [5:09:31<19:54:50, 1.40s/it] 17%|█▋ | 10645/61904 [5:09:32<20:45:57, 1.46s/it] 17%|█▋ | 10646/61904 [5:09:34<20:16:25, 1.42s/it] 17%|█▋ | 10647/61904 [5:09:35<19:46:28, 1.39s/it] 17%|█▋ | 10648/61904 [5:09:37<20:19:29, 1.43s/it] 17%|█▋ | 10649/61904 [5:09:38<20:17:05, 1.42s/it] 17%|█▋ | 10650/61904 [5:09:39<20:27:27, 1.44s/it] 17%|█▋ | 10651/61904 [5:09:41<20:06:07, 1.41s/it] 17%|█▋ | 10652/61904 [5:09:42<19:37:14, 1.38s/it] 17%|█▋ | 10653/61904 [5:09:43<19:16:27, 1.35s/it] 17%|█▋ | 10654/61904 [5:09:45<18:46:22, 1.32s/it] 17%|█▋ | 10655/61904 [5:09:46<18:57:07, 1.33s/it] 17%|█▋ | 10656/61904 [5:09:47<18:53:42, 1.33s/it] 17%|█▋ | 10657/61904 [5:09:49<19:13:56, 1.35s/it] 17%|█▋ | 10658/61904 [5:09:50<19:32:06, 1.37s/it] 17%|█▋ | 10659/61904 [5:09:51<19:08:46, 1.35s/it] 17%|█▋ | 10660/61904 [5:09:53<19:05:51, 1.34s/it] {'loss': 2.7696, 'learning_rate': 1.8304810060936083e-07, 'epoch': 2.75} + 17%|█▋ | 10660/61904 [5:09:53<19:05:51, 1.34s/it] 17%|█▋ | 10661/61904 [5:09:54<18:46:39, 1.32s/it] 17%|█▋ | 10662/61904 [5:09:55<18:47:15, 1.32s/it] 17%|█▋ | 10663/61904 [5:09:57<18:49:33, 1.32s/it] 17%|█▋ | 10664/61904 [5:09:58<19:01:08, 1.34s/it] 17%|█▋ | 10665/61904 [5:09:59<19:08:22, 1.34s/it] 17%|█▋ | 10666/61904 [5:10:01<19:03:38, 1.34s/it] 17%|█▋ | 10667/61904 [5:10:02<18:58:15, 1.33s/it] 17%|█▋ | 10668/61904 [5:10:03<19:27:01, 1.37s/it] 17%|█▋ | 10669/61904 [5:10:05<19:14:35, 1.35s/it] 17%|█▋ | 10670/61904 [5:10:06<19:04:13, 1.34s/it] 17%|█▋ | 10671/61904 [5:10:07<19:13:38, 1.35s/it] 17%|█▋ | 10672/61904 [5:10:09<19:24:06, 1.36s/it] 17%|█▋ | 10673/61904 [5:10:10<20:08:33, 1.42s/it] 17%|█▋ | 10674/61904 [5:10:12<19:47:34, 1.39s/it] 17%|█▋ | 10675/61904 [5:10:13<20:24:48, 1.43s/it] 17%|█▋ | 10676/61904 [5:10:15<20:06:22, 1.41s/it] 17%|█▋ | 10677/61904 [5:10:16<19:31:17, 1.37s/it] 17%|█▋ | 10678/61904 [5:10:17<19:47:10, 1.39s/it] 17%|█▋ | 10679/61904 [5:10:19<19:28:00, 1.37s/it] 17%|█▋ | 10680/61904 [5:10:20<19:12:00, 1.35s/it] {'loss': 2.7616, 'learning_rate': 1.830156877998185e-07, 'epoch': 2.76} + 17%|█▋ | 10680/61904 [5:10:20<19:12:00, 1.35s/it] 17%|█▋ | 10681/61904 [5:10:21<19:12:17, 1.35s/it] 17%|█▋ | 10682/61904 [5:10:23<19:11:55, 1.35s/it] 17%|█▋ | 10683/61904 [5:10:24<18:39:16, 1.31s/it] 17%|█▋ | 10684/61904 [5:10:25<19:07:56, 1.34s/it] 17%|█▋ | 10685/61904 [5:10:27<18:39:01, 1.31s/it] 17%|█▋ | 10686/61904 [5:10:28<18:19:33, 1.29s/it] 17%|█▋ | 10687/61904 [5:10:29<18:14:27, 1.28s/it] 17%|█▋ | 10688/61904 [5:10:30<18:29:38, 1.30s/it] 17%|█▋ | 10689/61904 [5:10:32<19:32:39, 1.37s/it] 17%|█▋ | 10690/61904 [5:10:33<19:49:03, 1.39s/it] 17%|█▋ | 10691/61904 [5:10:35<20:01:25, 1.41s/it] 17%|█▋ | 10692/61904 [5:10:36<20:06:51, 1.41s/it] 17%|█▋ | 10693/61904 [5:10:38<20:11:18, 1.42s/it] 17%|█▋ | 10694/61904 [5:10:39<19:55:00, 1.40s/it] 17%|█▋ | 10695/61904 [5:10:40<19:33:52, 1.38s/it] 17%|█▋ | 10696/61904 [5:10:42<19:27:46, 1.37s/it] 17%|█▋ | 10697/61904 [5:10:43<18:54:13, 1.33s/it] 17%|█▋ | 10698/61904 [5:10:44<18:23:12, 1.29s/it] 17%|█▋ | 10699/61904 [5:10:46<20:11:05, 1.42s/it] 17%|█▋ | 10700/61904 [5:10:47<20:02:42, 1.41s/it] {'loss': 2.7976, 'learning_rate': 1.8298327499027615e-07, 'epoch': 2.77} + 17%|█▋ | 10700/61904 [5:10:47<20:02:42, 1.41s/it] 17%|█▋ | 10701/61904 [5:10:49<19:35:00, 1.38s/it] 17%|█▋ | 10702/61904 [5:10:50<19:20:21, 1.36s/it] 17%|█▋ | 10703/61904 [5:10:51<19:11:33, 1.35s/it] 17%|█▋ | 10704/61904 [5:10:52<18:52:49, 1.33s/it] 17%|█▋ | 10705/61904 [5:10:54<18:42:38, 1.32s/it] 17%|█▋ | 10706/61904 [5:10:55<19:06:42, 1.34s/it] 17%|█▋ | 10707/61904 [5:10:57<19:30:39, 1.37s/it] 17%|█▋ | 10708/61904 [5:10:58<19:57:49, 1.40s/it] 17%|█▋ | 10709/61904 [5:10:59<19:49:36, 1.39s/it] 17%|█▋ | 10710/61904 [5:11:01<19:34:32, 1.38s/it] 17%|█▋ | 10711/61904 [5:11:02<20:16:47, 1.43s/it] 17%|█▋ | 10712/61904 [5:11:04<19:45:51, 1.39s/it] 17%|█▋ | 10713/61904 [5:11:05<19:41:09, 1.38s/it] 17%|█▋ | 10714/61904 [5:11:06<19:40:21, 1.38s/it] 17%|█▋ | 10715/61904 [5:11:08<19:32:24, 1.37s/it] 17%|█▋ | 10716/61904 [5:11:09<19:24:51, 1.37s/it] 17%|█▋ | 10717/61904 [5:11:10<19:24:55, 1.37s/it] 17%|█▋ | 10718/61904 [5:11:12<18:59:46, 1.34s/it] 17%|█▋ | 10719/61904 [5:11:13<18:52:40, 1.33s/it] 17%|█▋ | 10720/61904 [5:11:14<19:04:12, 1.34s/it] {'loss': 2.8024, 'learning_rate': 1.829508621807338e-07, 'epoch': 2.77} + 17%|█▋ | 10720/61904 [5:11:14<19:04:12, 1.34s/it] 17%|█▋ | 10721/61904 [5:11:16<18:53:00, 1.33s/it] 17%|█▋ | 10722/61904 [5:11:17<19:01:44, 1.34s/it] 17%|█▋ | 10723/61904 [5:11:18<19:31:29, 1.37s/it] 17%|█▋ | 10724/61904 [5:11:20<19:00:19, 1.34s/it] 17%|█▋ | 10725/61904 [5:11:21<18:51:12, 1.33s/it] 17%|█▋ | 10726/61904 [5:11:22<18:50:41, 1.33s/it] 17%|█▋ | 10727/61904 [5:11:24<19:06:52, 1.34s/it] 17%|█▋ | 10728/61904 [5:11:25<18:50:19, 1.33s/it] 17%|█▋ | 10729/61904 [5:11:27<19:39:35, 1.38s/it] 17%|█▋ | 10730/61904 [5:11:28<20:00:53, 1.41s/it] 17%|█▋ | 10731/61904 [5:11:30<20:25:22, 1.44s/it] 17%|█▋ | 10732/61904 [5:11:31<20:04:08, 1.41s/it] 17%|█▋ | 10733/61904 [5:11:32<19:50:06, 1.40s/it] 17%|█▋ | 10734/61904 [5:11:34<19:37:54, 1.38s/it] 17%|█▋ | 10735/61904 [5:11:35<20:17:28, 1.43s/it] 17%|█▋ | 10736/61904 [5:11:36<20:02:13, 1.41s/it] 17%|█▋ | 10737/61904 [5:11:38<19:58:12, 1.41s/it] 17%|█▋ | 10738/61904 [5:11:39<19:37:48, 1.38s/it] 17%|█▋ | 10739/61904 [5:11:41<19:56:54, 1.40s/it] 17%|█▋ | 10740/61904 [5:11:42<19:48:27, 1.39s/it] {'loss': 2.8195, 'learning_rate': 1.8291844937119147e-07, 'epoch': 2.78} + 17%|█▋ | 10740/61904 [5:11:42<19:48:27, 1.39s/it] 17%|█▋ | 10741/61904 [5:11:43<19:23:42, 1.36s/it] 17%|█▋ | 10742/61904 [5:11:45<19:35:48, 1.38s/it] 17%|█▋ | 10743/61904 [5:11:46<20:29:57, 1.44s/it] 17%|█▋ | 10744/61904 [5:11:48<19:25:10, 1.37s/it] 17%|█▋ | 10745/61904 [5:11:49<20:12:32, 1.42s/it] 17%|█▋ | 10746/61904 [5:11:50<19:55:27, 1.40s/it] 17%|█▋ | 10747/61904 [5:11:52<19:28:52, 1.37s/it] 17%|█▋ | 10748/61904 [5:11:53<19:41:54, 1.39s/it] 17%|█▋ | 10749/61904 [5:11:55<19:42:19, 1.39s/it] 17%|█▋ | 10750/61904 [5:11:56<19:35:12, 1.38s/it] 17%|█▋ | 10751/61904 [5:11:57<19:23:01, 1.36s/it] 17%|█▋ | 10752/61904 [5:11:59<19:24:07, 1.37s/it] 17%|█▋ | 10753/61904 [5:12:00<19:17:52, 1.36s/it] 17%|█▋ | 10754/61904 [5:12:01<19:14:17, 1.35s/it] 17%|█▋ | 10755/61904 [5:12:02<18:37:30, 1.31s/it] 17%|█▋ | 10756/61904 [5:12:04<18:27:46, 1.30s/it] 17%|█▋ | 10757/61904 [5:12:05<18:32:21, 1.30s/it] 17%|█▋ | 10758/61904 [5:12:07<19:11:31, 1.35s/it] 17%|█▋ | 10759/61904 [5:12:08<21:14:15, 1.49s/it] 17%|█▋ | 10760/61904 [5:12:10<20:36:16, 1.45s/it] {'loss': 2.8792, 'learning_rate': 1.8288603656164916e-07, 'epoch': 2.78} + 17%|█▋ | 10760/61904 [5:12:10<20:36:16, 1.45s/it] 17%|█▋ | 10761/61904 [5:12:11<20:23:12, 1.44s/it] 17%|█▋ | 10762/61904 [5:12:13<20:25:19, 1.44s/it] 17%|█▋ | 10763/61904 [5:12:14<20:47:39, 1.46s/it] 17%|█▋ | 10764/61904 [5:12:15<19:56:21, 1.40s/it] 17%|█▋ | 10765/61904 [5:12:17<20:03:46, 1.41s/it] 17%|█▋ | 10766/61904 [5:12:18<19:55:55, 1.40s/it] 17%|█▋ | 10767/61904 [5:12:20<19:49:53, 1.40s/it] 17%|█▋ | 10768/61904 [5:12:21<19:13:37, 1.35s/it] 17%|█▋ | 10769/61904 [5:12:22<18:50:17, 1.33s/it] 17%|█▋ | 10770/61904 [5:12:23<18:24:01, 1.30s/it] 17%|█▋ | 10771/61904 [5:12:25<19:08:09, 1.35s/it] 17%|█▋ | 10772/61904 [5:12:26<19:01:29, 1.34s/it] 17%|█▋ | 10773/61904 [5:12:27<18:49:06, 1.32s/it] 17%|█▋ | 10774/61904 [5:12:29<18:49:58, 1.33s/it] 17%|█▋ | 10775/61904 [5:12:30<19:09:30, 1.35s/it] 17%|█▋ | 10776/61904 [5:12:32<19:36:14, 1.38s/it] 17%|█▋ | 10777/61904 [5:12:33<19:34:54, 1.38s/it] 17%|█▋ | 10778/61904 [5:12:34<19:09:34, 1.35s/it] 17%|█▋ | 10779/61904 [5:12:36<19:03:50, 1.34s/it] 17%|█▋ | 10780/61904 [5:12:37<19:28:10, 1.37s/it] {'loss': 2.8563, 'learning_rate': 1.8285362375210682e-07, 'epoch': 2.79} + 17%|█▋ | 10780/61904 [5:12:37<19:28:10, 1.37s/it] 17%|█▋ | 10781/61904 [5:12:38<19:23:59, 1.37s/it] 17%|█▋ | 10782/61904 [5:12:40<18:56:41, 1.33s/it] 17%|█▋ | 10783/61904 [5:12:41<18:38:45, 1.31s/it] 17%|█▋ | 10784/61904 [5:12:42<18:43:37, 1.32s/it] 17%|█▋ | 10785/61904 [5:12:44<19:20:51, 1.36s/it] 17%|█▋ | 10786/61904 [5:12:45<18:58:05, 1.34s/it] 17%|█▋ | 10787/61904 [5:12:46<18:51:11, 1.33s/it] 17%|█▋ | 10788/61904 [5:12:48<19:01:23, 1.34s/it] 17%|█▋ | 10789/61904 [5:12:49<19:23:16, 1.37s/it] 17%|█▋ | 10790/61904 [5:12:50<19:28:27, 1.37s/it] 17%|█▋ | 10791/61904 [5:12:52<19:13:31, 1.35s/it] 17%|█▋ | 10792/61904 [5:12:53<19:29:49, 1.37s/it] 17%|█▋ | 10793/61904 [5:12:54<19:16:53, 1.36s/it] 17%|█▋ | 10794/61904 [5:12:56<18:41:58, 1.32s/it] 17%|█▋ | 10795/61904 [5:12:57<18:37:55, 1.31s/it] 17%|█▋ | 10796/61904 [5:12:58<18:34:14, 1.31s/it] 17%|█▋ | 10797/61904 [5:13:00<18:55:52, 1.33s/it] 17%|█▋ | 10798/61904 [5:13:01<18:49:55, 1.33s/it] 17%|█▋ | 10799/61904 [5:13:02<18:25:36, 1.30s/it] 17%|█▋ | 10800/61904 [5:13:04<19:25:24, 1.37s/it] {'loss': 2.7792, 'learning_rate': 1.8282121094256449e-07, 'epoch': 2.79} + 17%|█▋ | 10800/61904 [5:13:04<19:25:24, 1.37s/it] 17%|█▋ | 10801/61904 [5:13:05<19:32:40, 1.38s/it] 17%|█▋ | 10802/61904 [5:13:06<19:17:40, 1.36s/it] 17%|█▋ | 10803/61904 [5:13:08<19:15:53, 1.36s/it] 17%|█▋ | 10804/61904 [5:13:09<19:25:36, 1.37s/it] 17%|█▋ | 10805/61904 [5:13:11<19:18:50, 1.36s/it] 17%|█▋ | 10806/61904 [5:13:12<19:11:15, 1.35s/it] 17%|█▋ | 10807/61904 [5:13:13<19:54:31, 1.40s/it] 17%|█▋ | 10808/61904 [5:13:15<19:21:44, 1.36s/it] 17%|█▋ | 10809/61904 [5:13:16<19:43:51, 1.39s/it] 17%|█▋ | 10810/61904 [5:13:18<19:46:02, 1.39s/it] 17%|█▋ | 10811/61904 [5:13:19<19:42:09, 1.39s/it] 17%|█▋ | 10812/61904 [5:13:20<19:29:26, 1.37s/it] 17%|█▋ | 10813/61904 [5:13:21<18:57:26, 1.34s/it] 17%|█▋ | 10814/61904 [5:13:23<19:00:12, 1.34s/it] 17%|█▋ | 10815/61904 [5:13:24<18:41:39, 1.32s/it] 17%|█▋ | 10816/61904 [5:13:25<18:31:37, 1.31s/it] 17%|█▋ | 10817/61904 [5:13:27<18:48:27, 1.33s/it] 17%|█▋ | 10818/61904 [5:13:28<19:04:32, 1.34s/it] 17%|█▋ | 10819/61904 [5:13:29<18:37:09, 1.31s/it] 17%|█▋ | 10820/61904 [5:13:31<19:25:24, 1.37s/it] {'loss': 2.8382, 'learning_rate': 1.8278879813302218e-07, 'epoch': 2.8} + 17%|█▋ | 10820/61904 [5:13:31<19:25:24, 1.37s/it] 17%|█▋ | 10821/61904 [5:13:32<19:26:58, 1.37s/it] 17%|█▋ | 10822/61904 [5:13:34<19:03:10, 1.34s/it] 17%|█▋ | 10823/61904 [5:13:35<18:48:09, 1.33s/it] 17%|█▋ | 10824/61904 [5:13:36<19:21:25, 1.36s/it] 17%|█▋ | 10825/61904 [5:13:38<19:40:49, 1.39s/it] 17%|█▋ | 10826/61904 [5:13:39<19:15:51, 1.36s/it] 17%|█▋ | 10827/61904 [5:13:40<19:25:33, 1.37s/it] 17%|█▋ | 10828/61904 [5:13:42<19:25:07, 1.37s/it] 17%|█▋ | 10829/61904 [5:13:43<20:01:16, 1.41s/it] 17%|█▋ | 10830/61904 [5:13:45<19:27:43, 1.37s/it] 17%|█▋ | 10831/61904 [5:13:46<19:46:43, 1.39s/it] 17%|█▋ | 10832/61904 [5:13:47<19:39:13, 1.39s/it] 17%|█▋ | 10833/61904 [5:13:49<20:14:31, 1.43s/it] 18%|█▊ | 10834/61904 [5:13:50<19:33:49, 1.38s/it] 18%|█▊ | 10835/61904 [5:13:51<19:03:47, 1.34s/it] 18%|█▊ | 10836/61904 [5:13:53<18:49:05, 1.33s/it] 18%|█▊ | 10837/61904 [5:13:54<19:27:44, 1.37s/it] 18%|█▊ | 10838/61904 [5:13:56<19:16:46, 1.36s/it] 18%|█▊ | 10839/61904 [5:13:57<18:59:52, 1.34s/it] 18%|█▊ | 10840/61904 [5:13:58<18:53:10, 1.33s/it] {'loss': 2.7697, 'learning_rate': 1.8275638532347984e-07, 'epoch': 2.8} + 18%|█▊ | 10840/61904 [5:13:58<18:53:10, 1.33s/it] 18%|█▊ | 10841/61904 [5:13:59<19:04:34, 1.34s/it] 18%|█▊ | 10842/61904 [5:14:01<19:01:44, 1.34s/it] 18%|█▊ | 10843/61904 [5:14:02<18:33:39, 1.31s/it] 18%|█▊ | 10844/61904 [5:14:03<19:01:28, 1.34s/it] 18%|█▊ | 10845/61904 [5:14:05<18:47:06, 1.32s/it] 18%|█▊ | 10846/61904 [5:14:06<18:30:02, 1.30s/it] 18%|█▊ | 10847/61904 [5:14:07<19:00:58, 1.34s/it] 18%|█▊ | 10848/61904 [5:14:09<18:58:18, 1.34s/it] 18%|█▊ | 10849/61904 [5:14:10<18:49:34, 1.33s/it] 18%|█▊ | 10850/61904 [5:14:11<18:53:30, 1.33s/it] 18%|█▊ | 10851/61904 [5:14:13<19:33:05, 1.38s/it] 18%|█▊ | 10852/61904 [5:14:14<19:09:31, 1.35s/it] 18%|█▊ | 10853/61904 [5:14:16<19:47:03, 1.40s/it] 18%|█▊ | 10854/61904 [5:14:17<19:30:42, 1.38s/it] 18%|█▊ | 10855/61904 [5:14:18<19:03:07, 1.34s/it] 18%|█▊ | 10856/61904 [5:14:20<18:51:06, 1.33s/it] 18%|█▊ | 10857/61904 [5:14:21<18:39:12, 1.32s/it] 18%|█▊ | 10858/61904 [5:14:22<18:17:45, 1.29s/it] 18%|█▊ | 10859/61904 [5:14:23<18:24:56, 1.30s/it] 18%|█▊ | 10860/61904 [5:14:25<18:55:16, 1.33s/it] {'loss': 2.8915, 'learning_rate': 1.827239725139375e-07, 'epoch': 2.81} + 18%|█▊ | 10860/61904 [5:14:25<18:55:16, 1.33s/it] 18%|█▊ | 10861/61904 [5:14:26<18:55:23, 1.33s/it] 18%|█▊ | 10862/61904 [5:14:28<19:13:21, 1.36s/it] 18%|█▊ | 10863/61904 [5:14:29<19:22:36, 1.37s/it] 18%|█▊ | 10864/61904 [5:14:30<19:56:19, 1.41s/it] 18%|█▊ | 10865/61904 [5:14:32<19:54:45, 1.40s/it] 18%|█▊ | 10866/61904 [5:14:33<19:27:46, 1.37s/it] 18%|█▊ | 10867/61904 [5:14:34<19:10:30, 1.35s/it] 18%|█▊ | 10868/61904 [5:14:36<19:08:36, 1.35s/it] 18%|█▊ | 10869/61904 [5:14:37<19:23:04, 1.37s/it] 18%|█▊ | 10870/61904 [5:14:39<19:16:26, 1.36s/it] 18%|█▊ | 10871/61904 [5:14:40<19:04:26, 1.35s/it] 18%|█▊ | 10872/61904 [5:14:41<19:54:54, 1.40s/it] 18%|█▊ | 10873/61904 [5:14:43<20:28:39, 1.44s/it] 18%|█▊ | 10874/61904 [5:14:44<20:21:29, 1.44s/it] 18%|█▊ | 10875/61904 [5:14:46<19:55:51, 1.41s/it] 18%|█▊ | 10876/61904 [5:14:47<19:50:36, 1.40s/it] 18%|█▊ | 10877/61904 [5:14:48<19:38:32, 1.39s/it] 18%|█▊ | 10878/61904 [5:14:50<19:42:42, 1.39s/it] 18%|█▊ | 10879/61904 [5:14:51<19:24:29, 1.37s/it] 18%|█▊ | 10880/61904 [5:14:53<19:24:04, 1.37s/it] {'loss': 2.8202, 'learning_rate': 1.826915597043952e-07, 'epoch': 2.81} + 18%|█▊ | 10880/61904 [5:14:53<19:24:04, 1.37s/it] 18%|█▊ | 10881/61904 [5:14:54<20:05:20, 1.42s/it] 18%|█▊ | 10882/61904 [5:14:55<20:04:08, 1.42s/it] 18%|█▊ | 10883/61904 [5:14:57<19:52:27, 1.40s/it] 18%|█▊ | 10884/61904 [5:14:58<19:24:03, 1.37s/it] 18%|█▊ | 10885/61904 [5:15:00<19:20:31, 1.36s/it] 18%|█▊ | 10886/61904 [5:15:01<19:05:56, 1.35s/it] 18%|█▊ | 10887/61904 [5:15:02<19:18:20, 1.36s/it] 18%|█▊ | 10888/61904 [5:15:04<19:13:22, 1.36s/it] 18%|█▊ | 10889/61904 [5:15:05<19:02:56, 1.34s/it] 18%|█▊ | 10890/61904 [5:15:06<19:06:56, 1.35s/it] 18%|█▊ | 10891/61904 [5:15:08<20:24:33, 1.44s/it] 18%|█▊ | 10892/61904 [5:15:09<20:04:48, 1.42s/it] 18%|█▊ | 10893/61904 [5:15:11<19:28:48, 1.37s/it] 18%|█▊ | 10894/61904 [5:15:12<19:20:17, 1.36s/it] 18%|█▊ | 10895/61904 [5:15:13<20:02:26, 1.41s/it] 18%|█▊ | 10896/61904 [5:15:15<19:19:18, 1.36s/it] 18%|█▊ | 10897/61904 [5:15:16<19:26:01, 1.37s/it] 18%|█▊ | 10898/61904 [5:15:17<19:27:40, 1.37s/it] 18%|█▊ | 10899/61904 [5:15:19<19:24:16, 1.37s/it] 18%|█▊ | 10900/61904 [5:15:20<19:40:06, 1.39s/it] {'loss': 2.7699, 'learning_rate': 1.8265914689485282e-07, 'epoch': 2.82} + 18%|█▊ | 10900/61904 [5:15:20<19:40:06, 1.39s/it] 18%|█▊ | 10901/61904 [5:15:22<19:41:09, 1.39s/it] 18%|█▊ | 10902/61904 [5:15:23<20:08:39, 1.42s/it] 18%|█▊ | 10903/61904 [5:15:24<20:03:51, 1.42s/it] 18%|█▊ | 10904/61904 [5:15:26<19:47:07, 1.40s/it] 18%|█▊ | 10905/61904 [5:15:27<19:44:00, 1.39s/it] 18%|█▊ | 10906/61904 [5:15:29<19:37:59, 1.39s/it] 18%|█▊ | 10907/61904 [5:15:30<19:56:59, 1.41s/it] 18%|█▊ | 10908/61904 [5:15:31<19:42:43, 1.39s/it] 18%|█▊ | 10909/61904 [5:15:33<20:06:42, 1.42s/it] 18%|█▊ | 10910/61904 [5:15:35<20:56:08, 1.48s/it] 18%|█▊ | 10911/61904 [5:15:36<20:06:25, 1.42s/it] 18%|█▊ | 10912/61904 [5:15:37<19:53:57, 1.40s/it] 18%|█▊ | 10913/61904 [5:15:39<19:59:34, 1.41s/it] 18%|█▊ | 10914/61904 [5:15:40<19:27:34, 1.37s/it] 18%|█▊ | 10915/61904 [5:15:41<18:59:32, 1.34s/it] 18%|█▊ | 10916/61904 [5:15:43<19:11:52, 1.36s/it] 18%|█▊ | 10917/61904 [5:15:44<19:44:48, 1.39s/it] 18%|█▊ | 10918/61904 [5:15:45<19:15:20, 1.36s/it] 18%|█▊ | 10919/61904 [5:15:47<19:25:37, 1.37s/it] 18%|█▊ | 10920/61904 [5:15:48<19:07:06, 1.35s/it] {'loss': 2.8795, 'learning_rate': 1.826267340853105e-07, 'epoch': 2.82} + 18%|█▊ | 10920/61904 [5:15:48<19:07:06, 1.35s/it] 18%|█▊ | 10921/61904 [5:15:49<18:58:03, 1.34s/it] 18%|█▊ | 10922/61904 [5:15:51<19:26:16, 1.37s/it] 18%|█▊ | 10923/61904 [5:15:52<19:30:32, 1.38s/it] 18%|█▊ | 10924/61904 [5:15:54<19:51:07, 1.40s/it] 18%|█▊ | 10925/61904 [5:15:55<20:17:13, 1.43s/it] 18%|█▊ | 10926/61904 [5:15:57<20:24:35, 1.44s/it] 18%|█▊ | 10927/61904 [5:15:58<20:59:56, 1.48s/it] 18%|█▊ | 10928/61904 [5:15:59<20:20:39, 1.44s/it] 18%|█▊ | 10929/61904 [5:16:01<20:26:27, 1.44s/it] 18%|█▊ | 10930/61904 [5:16:02<19:49:01, 1.40s/it] 18%|█▊ | 10931/61904 [5:16:04<19:52:04, 1.40s/it] 18%|█▊ | 10932/61904 [5:16:05<20:20:57, 1.44s/it] 18%|█▊ | 10933/61904 [5:16:07<19:58:49, 1.41s/it] 18%|█▊ | 10934/61904 [5:16:08<19:42:26, 1.39s/it] 18%|█▊ | 10935/61904 [5:16:09<19:45:20, 1.40s/it] 18%|█▊ | 10936/61904 [5:16:11<19:19:08, 1.36s/it] 18%|█▊ | 10937/61904 [5:16:12<19:12:57, 1.36s/it] 18%|█▊ | 10938/61904 [5:16:13<18:41:01, 1.32s/it] 18%|█▊ | 10939/61904 [5:16:15<18:58:36, 1.34s/it] 18%|█▊ | 10940/61904 [5:16:16<18:59:50, 1.34s/it] {'loss': 2.7777, 'learning_rate': 1.8259432127576817e-07, 'epoch': 2.83} + 18%|█▊ | 10940/61904 [5:16:16<18:59:50, 1.34s/it] 18%|█▊ | 10941/61904 [5:16:17<18:58:57, 1.34s/it] 18%|█▊ | 10942/61904 [5:16:18<18:43:01, 1.32s/it] 18%|█▊ | 10943/61904 [5:16:20<18:45:21, 1.32s/it] 18%|█▊ | 10944/61904 [5:16:21<18:36:26, 1.31s/it] 18%|█▊ | 10945/61904 [5:16:22<18:55:17, 1.34s/it] 18%|█▊ | 10946/61904 [5:16:24<19:55:18, 1.41s/it] 18%|█▊ | 10947/61904 [5:16:25<19:35:24, 1.38s/it] 18%|█▊ | 10948/61904 [5:16:27<19:43:09, 1.39s/it] 18%|█▊ | 10949/61904 [5:16:28<19:25:31, 1.37s/it] 18%|█▊ | 10950/61904 [5:16:29<19:14:23, 1.36s/it] 18%|█▊ | 10951/61904 [5:16:31<19:58:38, 1.41s/it] 18%|█▊ | 10952/61904 [5:16:32<20:10:17, 1.43s/it] 18%|█▊ | 10953/61904 [5:16:34<19:39:05, 1.39s/it] 18%|█▊ | 10954/61904 [5:16:35<19:00:51, 1.34s/it] 18%|█▊ | 10955/61904 [5:16:36<19:11:47, 1.36s/it] 18%|█▊ | 10956/61904 [5:16:38<19:12:47, 1.36s/it] 18%|█▊ | 10957/61904 [5:16:39<19:15:31, 1.36s/it] 18%|█▊ | 10958/61904 [5:16:40<19:09:35, 1.35s/it] 18%|█▊ | 10959/61904 [5:16:42<18:41:06, 1.32s/it] 18%|█▊ | 10960/61904 [5:16:43<19:10:35, 1.36s/it] {'loss': 2.8015, 'learning_rate': 1.8256190846622583e-07, 'epoch': 2.83} + 18%|█▊ | 10960/61904 [5:16:43<19:10:35, 1.36s/it] 18%|█▊ | 10961/61904 [5:16:45<19:59:54, 1.41s/it] 18%|█▊ | 10962/61904 [5:16:46<20:18:39, 1.44s/it] 18%|█▊ | 10963/61904 [5:16:47<19:55:21, 1.41s/it] 18%|█▊ | 10964/61904 [5:16:49<20:01:33, 1.42s/it] 18%|█▊ | 10965/61904 [5:16:50<19:24:31, 1.37s/it] 18%|█▊ | 10966/61904 [5:16:52<19:27:38, 1.38s/it] 18%|█▊ | 10967/61904 [5:16:53<19:43:26, 1.39s/it] 18%|█▊ | 10968/61904 [5:16:54<19:09:25, 1.35s/it] 18%|█▊ | 10969/61904 [5:16:56<19:36:05, 1.39s/it] 18%|█▊ | 10970/61904 [5:16:57<19:13:49, 1.36s/it] 18%|█▊ | 10971/61904 [5:16:58<19:20:17, 1.37s/it] 18%|█▊ | 10972/61904 [5:17:00<19:13:08, 1.36s/it] 18%|█▊ | 10973/61904 [5:17:01<19:35:08, 1.38s/it] 18%|█▊ | 10974/61904 [5:17:02<19:01:15, 1.34s/it] 18%|█▊ | 10975/61904 [5:17:04<18:43:13, 1.32s/it] 18%|█▊ | 10976/61904 [5:17:05<19:18:14, 1.36s/it] 18%|█▊ | 10977/61904 [5:17:07<19:35:23, 1.38s/it] 18%|█▊ | 10978/61904 [5:17:08<20:10:17, 1.43s/it] 18%|█▊ | 10979/61904 [5:17:10<20:16:20, 1.43s/it] 18%|█▊ | 10980/61904 [5:17:11<19:15:51, 1.36s/it] {'loss': 2.8306, 'learning_rate': 1.8252949565668352e-07, 'epoch': 2.84} + 18%|█▊ | 10980/61904 [5:17:11<19:15:51, 1.36s/it] 18%|█▊ | 10981/61904 [5:17:12<19:07:51, 1.35s/it] 18%|█▊ | 10982/61904 [5:17:13<19:02:23, 1.35s/it] 18%|█▊ | 10983/61904 [5:17:15<19:37:03, 1.39s/it] 18%|█▊ | 10984/61904 [5:17:16<19:26:33, 1.37s/it] 18%|█▊ | 10985/61904 [5:17:18<18:56:11, 1.34s/it] 18%|█▊ | 10986/61904 [5:17:19<19:11:00, 1.36s/it] 18%|█▊ | 10987/61904 [5:17:20<18:44:10, 1.32s/it] 18%|█▊ | 10988/61904 [5:17:22<18:51:11, 1.33s/it] 18%|█▊ | 10989/61904 [5:17:23<19:00:19, 1.34s/it] 18%|█▊ | 10990/61904 [5:17:24<18:47:41, 1.33s/it] 18%|█▊ | 10991/61904 [5:17:26<19:28:04, 1.38s/it] 18%|█▊ | 10992/61904 [5:17:27<19:57:00, 1.41s/it] 18%|█▊ | 10993/61904 [5:17:28<19:21:53, 1.37s/it] 18%|█▊ | 10994/61904 [5:17:30<19:05:21, 1.35s/it] 18%|█▊ | 10995/61904 [5:17:31<19:19:39, 1.37s/it] 18%|█▊ | 10996/61904 [5:17:32<18:52:49, 1.34s/it] 18%|█▊ | 10997/61904 [5:17:34<18:30:27, 1.31s/it] 18%|█▊ | 10998/61904 [5:17:35<18:34:43, 1.31s/it] 18%|█▊ | 10999/61904 [5:17:36<18:26:25, 1.30s/it] 18%|█▊ | 11000/61904 [5:17:38<18:55:49, 1.34s/it] {'loss': 2.986, 'learning_rate': 1.8249708284714118e-07, 'epoch': 2.84} + 18%|█▊ | 11000/61904 [5:17:38<18:55:49, 1.34s/it] 18%|█▊ | 11001/61904 [5:17:39<18:56:41, 1.34s/it] 18%|█▊ | 11002/61904 [5:17:40<19:01:44, 1.35s/it] 18%|█▊ | 11003/61904 [5:17:42<18:46:15, 1.33s/it] 18%|█▊ | 11004/61904 [5:17:43<18:42:54, 1.32s/it] 18%|█▊ | 11005/61904 [5:17:44<19:04:48, 1.35s/it] 18%|█▊ | 11006/61904 [5:17:46<19:36:27, 1.39s/it] 18%|█▊ | 11007/61904 [5:17:47<19:26:10, 1.37s/it] 18%|█▊ | 11008/61904 [5:17:48<18:50:00, 1.33s/it] 18%|█▊ | 11009/61904 [5:17:50<19:47:30, 1.40s/it] 18%|█▊ | 11010/61904 [5:17:51<19:25:56, 1.37s/it] 18%|█▊ | 11011/61904 [5:17:53<19:35:57, 1.39s/it] 18%|█▊ | 11012/61904 [5:17:54<19:08:29, 1.35s/it] 18%|█▊ | 11013/61904 [5:17:56<19:42:45, 1.39s/it] 18%|█▊ | 11014/61904 [5:17:57<20:08:48, 1.43s/it] 18%|█▊ | 11015/61904 [5:17:59<20:26:49, 1.45s/it] 18%|█▊ | 11016/61904 [5:18:00<21:06:34, 1.49s/it] 18%|█▊ | 11017/61904 [5:18:01<20:32:57, 1.45s/it] 18%|█▊ | 11018/61904 [5:18:03<19:43:10, 1.40s/it] 18%|█▊ | 11019/61904 [5:18:04<19:37:12, 1.39s/it] 18%|█▊ | 11020/61904 [5:18:06<20:01:22, 1.42s/it] {'loss': 2.785, 'learning_rate': 1.8246467003759885e-07, 'epoch': 2.85} + 18%|█▊ | 11020/61904 [5:18:06<20:01:22, 1.42s/it] 18%|█▊ | 11021/61904 [5:18:07<19:32:16, 1.38s/it] 18%|█▊ | 11022/61904 [5:18:08<18:53:35, 1.34s/it] 18%|█▊ | 11023/61904 [5:18:09<19:00:04, 1.34s/it] 18%|█▊ | 11024/61904 [5:18:11<18:57:23, 1.34s/it] 18%|█▊ | 11025/61904 [5:18:12<18:41:41, 1.32s/it] 18%|█▊ | 11026/61904 [5:18:14<19:19:00, 1.37s/it] 18%|█▊ | 11027/61904 [5:18:15<18:53:45, 1.34s/it] 18%|█▊ | 11028/61904 [5:18:16<18:40:41, 1.32s/it] 18%|█▊ | 11029/61904 [5:18:17<18:26:29, 1.30s/it] 18%|█▊ | 11030/61904 [5:18:19<19:06:12, 1.35s/it] 18%|█▊ | 11031/61904 [5:18:20<18:37:55, 1.32s/it] 18%|█▊ | 11032/61904 [5:18:21<18:49:31, 1.33s/it] 18%|█▊ | 11033/61904 [5:18:23<19:19:29, 1.37s/it] 18%|█▊ | 11034/61904 [5:18:24<19:13:08, 1.36s/it] 18%|█▊ | 11035/61904 [5:18:26<18:51:27, 1.33s/it] 18%|█▊ | 11036/61904 [5:18:27<19:06:02, 1.35s/it] 18%|█▊ | 11037/61904 [5:18:28<19:17:37, 1.37s/it] 18%|█▊ | 11038/61904 [5:18:30<19:40:42, 1.39s/it] 18%|█▊ | 11039/61904 [5:18:31<19:03:23, 1.35s/it] 18%|█▊ | 11040/61904 [5:18:32<19:01:47, 1.35s/it] {'loss': 2.7796, 'learning_rate': 1.8243225722805653e-07, 'epoch': 2.85} + 18%|█▊ | 11040/61904 [5:18:32<19:01:47, 1.35s/it] 18%|█▊ | 11041/61904 [5:18:34<19:15:39, 1.36s/it] 18%|█▊ | 11042/61904 [5:18:35<19:31:57, 1.38s/it] 18%|█▊ | 11043/61904 [5:18:37<19:29:37, 1.38s/it] 18%|█▊ | 11044/61904 [5:18:38<19:13:57, 1.36s/it] 18%|█▊ | 11045/61904 [5:18:39<19:21:36, 1.37s/it] 18%|█▊ | 11046/61904 [5:18:41<18:58:42, 1.34s/it] 18%|█▊ | 11047/61904 [5:18:42<18:30:53, 1.31s/it] 18%|█▊ | 11048/61904 [5:18:43<19:30:15, 1.38s/it] 18%|█▊ | 11049/61904 [5:18:45<19:57:56, 1.41s/it] 18%|█▊ | 11050/61904 [5:18:46<19:50:46, 1.40s/it] 18%|█▊ | 11051/61904 [5:18:48<20:21:42, 1.44s/it] 18%|█▊ | 11052/61904 [5:18:49<20:13:02, 1.43s/it] 18%|█▊ | 11053/61904 [5:18:50<19:43:04, 1.40s/it] 18%|█▊ | 11054/61904 [5:18:52<19:09:59, 1.36s/it] 18%|█▊ | 11055/61904 [5:18:53<19:20:16, 1.37s/it] 18%|█▊ | 11056/61904 [5:18:55<19:54:27, 1.41s/it] 18%|█▊ | 11057/61904 [5:18:56<19:56:48, 1.41s/it] 18%|█▊ | 11058/61904 [5:18:57<19:36:42, 1.39s/it] 18%|█▊ | 11059/61904 [5:18:59<19:43:14, 1.40s/it] 18%|█▊ | 11060/61904 [5:19:00<20:02:14, 1.42s/it] {'loss': 2.7851, 'learning_rate': 1.823998444185142e-07, 'epoch': 2.86} + 18%|█▊ | 11060/61904 [5:19:00<20:02:14, 1.42s/it] 18%|█▊ | 11061/61904 [5:19:02<19:59:21, 1.42s/it] 18%|█▊ | 11062/61904 [5:19:03<19:44:09, 1.40s/it] 18%|█▊ | 11063/61904 [5:19:04<19:22:13, 1.37s/it] 18%|█▊ | 11064/61904 [5:19:06<19:20:57, 1.37s/it] 18%|█▊ | 11065/61904 [5:19:07<18:53:15, 1.34s/it] 18%|█▊ | 11066/61904 [5:19:08<19:39:21, 1.39s/it] 18%|█▊ | 11067/61904 [5:19:10<20:07:56, 1.43s/it] 18%|█▊ | 11068/61904 [5:19:11<19:15:15, 1.36s/it] 18%|█▊ | 11069/61904 [5:19:13<19:10:51, 1.36s/it] 18%|█▊ | 11070/61904 [5:19:14<19:00:22, 1.35s/it] 18%|█▊ | 11071/61904 [5:19:15<19:41:17, 1.39s/it] 18%|█▊ | 11072/61904 [5:19:17<19:26:30, 1.38s/it] 18%|█▊ | 11073/61904 [5:19:18<19:39:12, 1.39s/it] 18%|█▊ | 11074/61904 [5:19:19<19:12:46, 1.36s/it] 18%|█▊ | 11075/61904 [5:19:21<19:19:34, 1.37s/it] 18%|█▊ | 11076/61904 [5:19:22<18:46:21, 1.33s/it] 18%|█▊ | 11077/61904 [5:19:23<19:00:18, 1.35s/it] 18%|█▊ | 11078/61904 [5:19:25<18:41:02, 1.32s/it] 18%|█▊ | 11079/61904 [5:19:26<18:58:41, 1.34s/it] 18%|█▊ | 11080/61904 [5:19:27<18:49:05, 1.33s/it] {'loss': 2.8133, 'learning_rate': 1.8236743160897186e-07, 'epoch': 2.86} + 18%|█▊ | 11080/61904 [5:19:27<18:49:05, 1.33s/it] 18%|█▊ | 11081/61904 [5:19:29<18:58:45, 1.34s/it] 18%|█▊ | 11082/61904 [5:19:30<18:49:45, 1.33s/it] 18%|█▊ | 11083/61904 [5:19:31<19:00:39, 1.35s/it] 18%|█▊ | 11084/61904 [5:19:33<19:24:07, 1.37s/it] 18%|█▊ | 11085/61904 [5:19:34<19:23:24, 1.37s/it] 18%|█▊ | 11086/61904 [5:19:36<18:56:50, 1.34s/it] 18%|█▊ | 11087/61904 [5:19:37<18:39:49, 1.32s/it] 18%|█▊ | 11088/61904 [5:19:38<18:52:00, 1.34s/it] 18%|█▊ | 11089/61904 [5:19:40<19:15:44, 1.36s/it] 18%|█▊ | 11090/61904 [5:19:41<18:56:16, 1.34s/it] 18%|█▊ | 11091/61904 [5:19:42<18:55:30, 1.34s/it] 18%|█▊ | 11092/61904 [5:19:44<19:12:30, 1.36s/it] 18%|█▊ | 11093/61904 [5:19:45<20:41:16, 1.47s/it] 18%|█▊ | 11094/61904 [5:19:47<20:13:49, 1.43s/it] 18%|█▊ | 11095/61904 [5:19:48<19:54:42, 1.41s/it] 18%|█▊ | 11096/61904 [5:19:49<19:51:15, 1.41s/it] 18%|█▊ | 11097/61904 [5:19:51<19:31:33, 1.38s/it] 18%|█▊ | 11098/61904 [5:19:52<19:33:52, 1.39s/it] 18%|█▊ | 11099/61904 [5:19:54<19:46:19, 1.40s/it] 18%|█▊ | 11100/61904 [5:19:55<20:08:16, 1.43s/it] {'loss': 2.7767, 'learning_rate': 1.8233501879942955e-07, 'epoch': 2.87} + 18%|█▊ | 11100/61904 [5:19:55<20:08:16, 1.43s/it] 18%|█▊ | 11101/61904 [5:19:56<19:31:04, 1.38s/it] 18%|█▊ | 11102/61904 [5:19:58<19:48:29, 1.40s/it] 18%|█▊ | 11103/61904 [5:19:59<19:40:32, 1.39s/it] 18%|█▊ | 11104/61904 [5:20:01<19:23:46, 1.37s/it] 18%|█▊ | 11105/61904 [5:20:02<19:10:11, 1.36s/it] 18%|█▊ | 11106/61904 [5:20:03<19:04:58, 1.35s/it] 18%|█▊ | 11107/61904 [5:20:05<19:23:00, 1.37s/it] 18%|█▊ | 11108/61904 [5:20:06<19:05:58, 1.35s/it] 18%|█▊ | 11109/61904 [5:20:07<18:44:51, 1.33s/it] 18%|█▊ | 11110/61904 [5:20:09<19:19:34, 1.37s/it] 18%|█▊ | 11111/61904 [5:20:10<18:48:41, 1.33s/it] 18%|█▊ | 11112/61904 [5:20:11<18:54:28, 1.34s/it] 18%|█▊ | 11113/61904 [5:20:13<18:46:44, 1.33s/it] 18%|█▊ | 11114/61904 [5:20:14<18:24:41, 1.31s/it] 18%|█▊ | 11115/61904 [5:20:15<18:48:28, 1.33s/it] 18%|█▊ | 11116/61904 [5:20:17<18:52:35, 1.34s/it] 18%|█▊ | 11117/61904 [5:20:18<18:39:15, 1.32s/it] 18%|█▊ | 11118/61904 [5:20:19<18:29:08, 1.31s/it] 18%|█▊ | 11119/61904 [5:20:20<18:29:12, 1.31s/it] 18%|█▊ | 11120/61904 [5:20:22<18:29:09, 1.31s/it] {'loss': 2.8456, 'learning_rate': 1.8230260598988718e-07, 'epoch': 2.87} + 18%|█▊ | 11120/61904 [5:20:22<18:29:09, 1.31s/it] 18%|█▊ | 11121/61904 [5:20:23<19:05:54, 1.35s/it] 18%|█▊ | 11122/61904 [5:20:24<18:23:27, 1.30s/it] 18%|█▊ | 11123/61904 [5:20:26<19:01:44, 1.35s/it] 18%|█▊ | 11124/61904 [5:20:27<19:04:04, 1.35s/it] 18%|█▊ | 11125/61904 [5:20:29<19:04:22, 1.35s/it] 18%|█▊ | 11126/61904 [5:20:30<19:03:39, 1.35s/it] 18%|█▊ | 11127/61904 [5:20:31<18:40:22, 1.32s/it] 18%|█▊ | 11128/61904 [5:20:32<18:09:04, 1.29s/it] 18%|█▊ | 11129/61904 [5:20:34<18:24:33, 1.31s/it] 18%|█▊ | 11130/61904 [5:20:35<19:19:52, 1.37s/it] 18%|█▊ | 11131/61904 [5:20:37<19:06:47, 1.36s/it] 18%|█▊ | 11132/61904 [5:20:38<18:53:37, 1.34s/it] 18%|█▊ | 11133/61904 [5:20:39<19:41:22, 1.40s/it] 18%|█▊ | 11134/61904 [5:20:41<19:57:45, 1.42s/it] 18%|█▊ | 11135/61904 [5:20:42<19:16:07, 1.37s/it] 18%|█▊ | 11136/61904 [5:20:43<18:48:36, 1.33s/it] 18%|█▊ | 11137/61904 [5:20:45<18:36:47, 1.32s/it] 18%|█▊ | 11138/61904 [5:20:46<18:36:55, 1.32s/it] 18%|█▊ | 11139/61904 [5:20:47<18:39:36, 1.32s/it] 18%|█▊ | 11140/61904 [5:20:49<18:26:15, 1.31s/it] {'loss': 2.7697, 'learning_rate': 1.8227019318034487e-07, 'epoch': 2.88} + 18%|█▊ | 11140/61904 [5:20:49<18:26:15, 1.31s/it] 18%|█▊ | 11141/61904 [5:20:50<19:25:09, 1.38s/it] 18%|█▊ | 11142/61904 [5:20:52<19:34:08, 1.39s/it] 18%|█▊ | 11143/61904 [5:20:53<19:17:57, 1.37s/it] 18%|█▊ | 11144/61904 [5:20:54<18:46:29, 1.33s/it] 18%|█▊ | 11145/61904 [5:20:55<18:21:27, 1.30s/it] 18%|█▊ | 11146/61904 [5:20:57<18:31:19, 1.31s/it] 18%|█▊ | 11147/61904 [5:20:58<18:49:09, 1.33s/it] 18%|█▊ | 11148/61904 [5:20:59<19:08:14, 1.36s/it] 18%|█▊ | 11149/61904 [5:21:01<19:17:40, 1.37s/it] 18%|█▊ | 11150/61904 [5:21:02<19:28:44, 1.38s/it] 18%|█▊ | 11151/61904 [5:21:04<19:11:29, 1.36s/it] 18%|█▊ | 11152/61904 [5:21:05<19:38:37, 1.39s/it] 18%|█▊ | 11153/61904 [5:21:06<19:41:05, 1.40s/it] 18%|█▊ | 11154/61904 [5:21:08<20:19:57, 1.44s/it] 18%|█▊ | 11155/61904 [5:21:09<19:33:50, 1.39s/it] 18%|█▊ | 11156/61904 [5:21:11<19:20:47, 1.37s/it] 18%|█▊ | 11157/61904 [5:21:12<19:23:53, 1.38s/it] 18%|█▊ | 11158/61904 [5:21:13<19:30:08, 1.38s/it] 18%|█▊ | 11159/61904 [5:21:15<19:16:55, 1.37s/it] 18%|█▊ | 11160/61904 [5:21:16<19:19:14, 1.37s/it] {'loss': 2.7501, 'learning_rate': 1.8223778037080253e-07, 'epoch': 2.88} + 18%|█▊ | 11160/61904 [5:21:16<19:19:14, 1.37s/it] 18%|█▊ | 11161/61904 [5:21:17<19:18:25, 1.37s/it] 18%|█▊ | 11162/61904 [5:21:19<19:37:57, 1.39s/it] 18%|█▊ | 11163/61904 [5:21:20<19:06:16, 1.36s/it] 18%|█▊ | 11164/61904 [5:21:22<19:12:44, 1.36s/it] 18%|█▊ | 11165/61904 [5:21:23<19:05:06, 1.35s/it] 18%|█▊ | 11166/61904 [5:21:24<19:08:18, 1.36s/it] 18%|█▊ | 11167/61904 [5:21:26<19:32:12, 1.39s/it] 18%|█▊ | 11168/61904 [5:21:27<19:19:00, 1.37s/it] 18%|█▊ | 11169/61904 [5:21:28<19:31:29, 1.39s/it] 18%|█▊ | 11170/61904 [5:21:30<21:25:30, 1.52s/it] 18%|█▊ | 11171/61904 [5:21:32<20:28:12, 1.45s/it] 18%|█▊ | 11172/61904 [5:21:33<20:20:04, 1.44s/it] 18%|█▊ | 11173/61904 [5:21:34<20:10:23, 1.43s/it] 18%|█▊ | 11174/61904 [5:21:36<19:50:51, 1.41s/it] 18%|█▊ | 11175/61904 [5:21:37<19:37:16, 1.39s/it] 18%|█▊ | 11176/61904 [5:21:39<20:13:25, 1.44s/it] 18%|█▊ | 11177/61904 [5:21:40<19:59:25, 1.42s/it] 18%|█▊ | 11178/61904 [5:21:41<19:39:48, 1.40s/it] 18%|█▊ | 11179/61904 [5:21:43<19:15:18, 1.37s/it] 18%|█▊ | 11180/61904 [5:21:44<18:46:21, 1.33s/it] {'loss': 2.8102, 'learning_rate': 1.822053675612602e-07, 'epoch': 2.89} + 18%|█▊ | 11180/61904 [5:21:44<18:46:21, 1.33s/it] 18%|█▊ | 11181/61904 [5:21:46<19:42:14, 1.40s/it] 18%|█▊ | 11182/61904 [5:21:47<19:32:35, 1.39s/it] 18%|█▊ | 11183/61904 [5:21:48<18:49:44, 1.34s/it] 18%|█▊ | 11184/61904 [5:21:50<19:21:37, 1.37s/it] 18%|█▊ | 11185/61904 [5:21:51<19:49:09, 1.41s/it] 18%|█▊ | 11186/61904 [5:21:52<19:17:47, 1.37s/it] 18%|█▊ | 11187/61904 [5:21:54<19:11:10, 1.36s/it] 18%|█▊ | 11188/61904 [5:21:55<19:30:16, 1.38s/it] 18%|█▊ | 11189/61904 [5:21:56<19:28:02, 1.38s/it] 18%|█▊ | 11190/61904 [5:21:58<19:38:43, 1.39s/it] 18%|█▊ | 11191/61904 [5:21:59<19:16:42, 1.37s/it] 18%|█▊ | 11192/61904 [5:22:01<19:18:25, 1.37s/it] 18%|█▊ | 11193/61904 [5:22:02<19:54:45, 1.41s/it] 18%|█▊ | 11194/61904 [5:22:03<19:49:13, 1.41s/it] 18%|█▊ | 11195/61904 [5:22:05<19:52:50, 1.41s/it] 18%|█▊ | 11196/61904 [5:22:06<20:31:53, 1.46s/it] 18%|█▊ | 11197/61904 [5:22:08<20:00:47, 1.42s/it] 18%|█▊ | 11198/61904 [5:22:09<20:38:16, 1.47s/it] 18%|█▊ | 11199/61904 [5:22:11<20:24:09, 1.45s/it] 18%|█▊ | 11200/61904 [5:22:12<20:23:54, 1.45s/it] {'loss': 2.8303, 'learning_rate': 1.8217295475171788e-07, 'epoch': 2.89} + 18%|█▊ | 11200/61904 [5:22:12<20:23:54, 1.45s/it] 18%|█▊ | 11201/61904 [5:22:13<19:32:00, 1.39s/it] 18%|█▊ | 11202/61904 [5:22:15<19:04:07, 1.35s/it] 18%|█▊ | 11203/61904 [5:22:16<18:32:00, 1.32s/it] 18%|█▊ | 11204/61904 [5:22:17<18:48:08, 1.34s/it] 18%|█▊ | 11205/61904 [5:22:19<18:48:58, 1.34s/it] 18%|█▊ | 11206/61904 [5:22:20<19:45:21, 1.40s/it] 18%|█▊ | 11207/61904 [5:22:22<20:10:23, 1.43s/it] 18%|█▊ | 11208/61904 [5:22:23<20:15:07, 1.44s/it] 18%|█▊ | 11209/61904 [5:22:24<19:31:23, 1.39s/it] 18%|█▊ | 11210/61904 [5:22:26<19:30:28, 1.39s/it] 18%|█▊ | 11211/61904 [5:22:27<19:11:05, 1.36s/it] 18%|█▊ | 11212/61904 [5:22:29<19:25:28, 1.38s/it] 18%|█▊ | 11213/61904 [5:22:30<19:45:09, 1.40s/it] 18%|█▊ | 11214/61904 [5:22:31<19:06:06, 1.36s/it] 18%|█▊ | 11215/61904 [5:22:33<19:42:04, 1.40s/it] 18%|█▊ | 11216/61904 [5:22:34<19:17:59, 1.37s/it] 18%|█▊ | 11217/61904 [5:22:36<20:14:53, 1.44s/it] 18%|█▊ | 11218/61904 [5:22:37<19:39:42, 1.40s/it] 18%|█▊ | 11219/61904 [5:22:38<19:14:13, 1.37s/it] 18%|█▊ | 11220/61904 [5:22:40<19:19:09, 1.37s/it] {'loss': 2.8061, 'learning_rate': 1.8214054194217554e-07, 'epoch': 2.9} + 18%|█▊ | 11220/61904 [5:22:40<19:19:09, 1.37s/it] 18%|█▊ | 11221/61904 [5:22:41<19:44:58, 1.40s/it] 18%|█▊ | 11222/61904 [5:22:42<19:30:08, 1.39s/it] 18%|█▊ | 11223/61904 [5:22:44<20:08:53, 1.43s/it] 18%|█▊ | 11224/61904 [5:22:45<19:43:19, 1.40s/it] 18%|█▊ | 11225/61904 [5:22:47<19:49:40, 1.41s/it] 18%|█▊ | 11226/61904 [5:22:48<19:55:31, 1.42s/it] 18%|█▊ | 11227/61904 [5:22:50<19:39:30, 1.40s/it] 18%|█▊ | 11228/61904 [5:22:51<19:15:58, 1.37s/it] 18%|█▊ | 11229/61904 [5:22:52<19:25:21, 1.38s/it] 18%|█▊ | 11230/61904 [5:22:54<20:11:20, 1.43s/it] 18%|█▊ | 11231/61904 [5:22:55<19:18:27, 1.37s/it] 18%|█▊ | 11232/61904 [5:22:56<18:46:06, 1.33s/it] 18%|█▊ | 11233/61904 [5:22:58<18:31:34, 1.32s/it] 18%|█▊ | 11234/61904 [5:22:59<18:33:06, 1.32s/it] 18%|█▊ | 11235/61904 [5:23:00<18:46:26, 1.33s/it] 18%|█▊ | 11236/61904 [5:23:02<19:19:47, 1.37s/it] 18%|█▊ | 11237/61904 [5:23:03<19:08:10, 1.36s/it] 18%|█▊ | 11238/61904 [5:23:05<19:35:09, 1.39s/it] 18%|█▊ | 11239/61904 [5:23:06<19:23:37, 1.38s/it] 18%|█▊ | 11240/61904 [5:23:07<18:49:20, 1.34s/it] {'loss': 2.853, 'learning_rate': 1.821081291326332e-07, 'epoch': 2.9} + 18%|█▊ | 11240/61904 [5:23:07<18:49:20, 1.34s/it] 18%|█▊ | 11241/61904 [5:23:08<18:26:21, 1.31s/it] 18%|█▊ | 11242/61904 [5:23:10<18:22:07, 1.31s/it] 18%|█▊ | 11243/61904 [5:23:11<19:26:31, 1.38s/it] 18%|█▊ | 11244/61904 [5:23:13<19:30:10, 1.39s/it] 18%|█▊ | 11245/61904 [5:23:14<18:50:01, 1.34s/it] 18%|█▊ | 11246/61904 [5:23:15<19:25:16, 1.38s/it] 18%|█▊ | 11247/61904 [5:23:17<19:27:15, 1.38s/it] 18%|█▊ | 11248/61904 [5:23:18<19:38:31, 1.40s/it] 18%|█▊ | 11249/61904 [5:23:20<19:50:21, 1.41s/it] 18%|█▊ | 11250/61904 [5:23:21<19:40:50, 1.40s/it] 18%|█▊ | 11251/61904 [5:23:23<20:37:09, 1.47s/it] 18%|█▊ | 11252/61904 [5:23:24<21:04:32, 1.50s/it] 18%|█▊ | 11253/61904 [5:23:26<21:08:10, 1.50s/it] 18%|█▊ | 11254/61904 [5:23:27<20:19:45, 1.44s/it] 18%|█▊ | 11255/61904 [5:23:28<20:18:11, 1.44s/it] 18%|█▊ | 11256/61904 [5:23:30<19:17:59, 1.37s/it] 18%|█▊ | 11257/61904 [5:23:31<18:46:09, 1.33s/it] 18%|█▊ | 11258/61904 [5:23:32<18:42:46, 1.33s/it] 18%|█▊ | 11259/61904 [5:23:33<18:37:24, 1.32s/it] 18%|█▊ | 11260/61904 [5:23:35<18:54:13, 1.34s/it] {'loss': 2.8419, 'learning_rate': 1.820757163230909e-07, 'epoch': 2.91} + 18%|█▊ | 11260/61904 [5:23:35<18:54:13, 1.34s/it] 18%|█▊ | 11261/61904 [5:23:36<19:11:53, 1.36s/it] 18%|█▊ | 11262/61904 [5:23:38<19:11:39, 1.36s/it] 18%|█▊ | 11263/61904 [5:23:39<19:12:10, 1.37s/it] 18%|█▊ | 11264/61904 [5:23:41<19:47:00, 1.41s/it] 18%|█▊ | 11265/61904 [5:23:42<19:21:26, 1.38s/it] 18%|█▊ | 11266/61904 [5:23:43<19:35:45, 1.39s/it] 18%|█▊ | 11267/61904 [5:23:45<19:35:47, 1.39s/it] 18%|█▊ | 11268/61904 [5:23:46<19:12:17, 1.37s/it] 18%|█▊ | 11269/61904 [5:23:47<18:39:40, 1.33s/it] 18%|█▊ | 11270/61904 [5:23:49<18:45:58, 1.33s/it] 18%|█▊ | 11271/61904 [5:23:50<19:23:57, 1.38s/it] 18%|█▊ | 11272/61904 [5:23:52<20:06:24, 1.43s/it] 18%|█▊ | 11273/61904 [5:23:53<20:38:29, 1.47s/it] 18%|█▊ | 11274/61904 [5:23:55<20:43:45, 1.47s/it] 18%|█▊ | 11275/61904 [5:23:56<20:14:52, 1.44s/it] 18%|█▊ | 11276/61904 [5:23:57<20:10:16, 1.43s/it] 18%|█▊ | 11277/61904 [5:23:59<19:37:28, 1.40s/it] 18%|█▊ | 11278/61904 [5:24:00<19:20:58, 1.38s/it] 18%|█▊ | 11279/61904 [5:24:02<19:44:29, 1.40s/it] 18%|█▊ | 11280/61904 [5:24:03<19:37:04, 1.40s/it] {'loss': 2.8818, 'learning_rate': 1.8204330351354856e-07, 'epoch': 2.92} + 18%|█▊ | 11280/61904 [5:24:03<19:37:04, 1.40s/it] 18%|█▊ | 11281/61904 [5:24:04<19:40:14, 1.40s/it] 18%|█▊ | 11282/61904 [5:24:06<19:35:44, 1.39s/it] 18%|█▊ | 11283/61904 [5:24:07<19:50:53, 1.41s/it] 18%|█▊ | 11284/61904 [5:24:08<19:08:37, 1.36s/it] 18%|█▊ | 11285/61904 [5:24:10<19:56:38, 1.42s/it] 18%|█▊ | 11286/61904 [5:24:11<19:40:47, 1.40s/it] 18%|█▊ | 11287/61904 [5:24:13<19:17:38, 1.37s/it] 18%|█▊ | 11288/61904 [5:24:14<19:39:51, 1.40s/it] 18%|█▊ | 11289/61904 [5:24:16<19:58:07, 1.42s/it] 18%|█▊ | 11290/61904 [5:24:17<19:35:59, 1.39s/it] 18%|█▊ | 11291/61904 [5:24:18<20:27:31, 1.46s/it] 18%|█▊ | 11292/61904 [5:24:20<20:39:44, 1.47s/it] 18%|█▊ | 11293/61904 [5:24:21<20:34:59, 1.46s/it] 18%|█▊ | 11294/61904 [5:24:23<20:25:03, 1.45s/it] 18%|█▊ | 11295/61904 [5:24:24<20:07:28, 1.43s/it] 18%|█▊ | 11296/61904 [5:24:26<19:58:26, 1.42s/it] 18%|█▊ | 11297/61904 [5:24:27<20:18:22, 1.44s/it] 18%|█▊ | 11298/61904 [5:24:29<20:04:41, 1.43s/it] 18%|█▊ | 11299/61904 [5:24:30<19:35:01, 1.39s/it] 18%|█▊ | 11300/61904 [5:24:31<19:54:03, 1.42s/it] {'loss': 2.7992, 'learning_rate': 1.8201089070400622e-07, 'epoch': 2.92} + 18%|█▊ | 11300/61904 [5:24:31<19:54:03, 1.42s/it] 18%|█▊ | 11301/61904 [5:24:33<19:39:54, 1.40s/it] 18%|█▊ | 11302/61904 [5:24:34<19:32:52, 1.39s/it] 18%|█▊ | 11303/61904 [5:24:35<19:49:47, 1.41s/it] 18%|█▊ | 11304/61904 [5:24:37<19:46:16, 1.41s/it] 18%|█▊ | 11305/61904 [5:24:38<19:38:53, 1.40s/it] 18%|█▊ | 11306/61904 [5:24:40<20:35:09, 1.46s/it] 18%|█▊ | 11307/61904 [5:24:41<20:13:12, 1.44s/it] 18%|█▊ | 11308/61904 [5:24:43<20:03:59, 1.43s/it] 18%|█▊ | 11309/61904 [5:24:44<19:30:13, 1.39s/it] 18%|█▊ | 11310/61904 [5:24:45<20:00:58, 1.42s/it] 18%|█▊ | 11311/61904 [5:24:47<19:55:41, 1.42s/it] 18%|█▊ | 11312/61904 [5:24:48<19:32:43, 1.39s/it] 18%|█▊ | 11313/61904 [5:24:50<19:16:49, 1.37s/it] 18%|█▊ | 11314/61904 [5:24:51<19:10:40, 1.36s/it] 18%|█▊ | 11315/61904 [5:24:52<19:29:52, 1.39s/it] 18%|█▊ | 11316/61904 [5:24:54<19:40:09, 1.40s/it] 18%|█▊ | 11317/61904 [5:24:55<19:21:51, 1.38s/it] 18%|█▊ | 11318/61904 [5:24:56<19:35:36, 1.39s/it] 18%|█▊ | 11319/61904 [5:24:58<19:43:53, 1.40s/it] 18%|█▊ | 11320/61904 [5:24:59<19:30:26, 1.39s/it] {'loss': 2.8553, 'learning_rate': 1.8197847789446388e-07, 'epoch': 2.93} + 18%|█▊ | 11320/61904 [5:24:59<19:30:26, 1.39s/it] 18%|█▊ | 11321/61904 [5:25:01<19:22:06, 1.38s/it] 18%|█▊ | 11322/61904 [5:25:02<19:32:07, 1.39s/it] 18%|█▊ | 11323/61904 [5:25:04<19:57:15, 1.42s/it] 18%|█▊ | 11324/61904 [5:25:05<19:49:26, 1.41s/it] 18%|█▊ | 11325/61904 [5:25:06<20:06:24, 1.43s/it] 18%|█▊ | 11326/61904 [5:25:08<19:12:38, 1.37s/it] 18%|█▊ | 11327/61904 [5:25:09<19:26:51, 1.38s/it] 18%|█▊ | 11328/61904 [5:25:10<19:20:05, 1.38s/it] 18%|█▊ | 11329/61904 [5:25:12<19:25:51, 1.38s/it] 18%|█▊ | 11330/61904 [5:25:13<19:44:44, 1.41s/it] 18%|█▊ | 11331/61904 [5:25:15<19:05:06, 1.36s/it] 18%|█▊ | 11332/61904 [5:25:16<19:31:06, 1.39s/it] 18%|█▊ | 11333/61904 [5:25:17<19:19:38, 1.38s/it] 18%|█▊ | 11334/61904 [5:25:19<19:15:43, 1.37s/it] 18%|█▊ | 11335/61904 [5:25:20<18:55:34, 1.35s/it] 18%|█▊ | 11336/61904 [5:25:21<19:23:16, 1.38s/it] 18%|█▊ | 11337/61904 [5:25:23<20:06:48, 1.43s/it] 18%|█▊ | 11338/61904 [5:25:24<19:51:15, 1.41s/it] 18%|█▊ | 11339/61904 [5:25:26<20:10:05, 1.44s/it] 18%|█▊ | 11340/61904 [5:25:27<19:48:47, 1.41s/it] {'loss': 2.7901, 'learning_rate': 1.8194606508492154e-07, 'epoch': 2.93} + 18%|█▊ | 11340/61904 [5:25:27<19:48:47, 1.41s/it] 18%|█▊ | 11341/61904 [5:25:29<19:32:06, 1.39s/it] 18%|█▊ | 11342/61904 [5:25:30<19:12:02, 1.37s/it] 18%|█▊ | 11343/61904 [5:25:31<19:03:05, 1.36s/it] 18%|█▊ | 11344/61904 [5:25:32<18:44:05, 1.33s/it] 18%|█▊ | 11345/61904 [5:25:34<18:46:14, 1.34s/it] 18%|█▊ | 11346/61904 [5:25:35<19:16:53, 1.37s/it] 18%|█▊ | 11347/61904 [5:25:37<19:03:11, 1.36s/it] 18%|█▊ | 11348/61904 [5:25:38<19:07:12, 1.36s/it] 18%|█▊ | 11349/61904 [5:25:39<18:46:47, 1.34s/it] 18%|█▊ | 11350/61904 [5:25:40<18:20:58, 1.31s/it] 18%|█▊ | 11351/61904 [5:25:42<18:17:26, 1.30s/it] 18%|█▊ | 11352/61904 [5:25:43<18:47:35, 1.34s/it] 18%|█▊ | 11353/61904 [5:25:45<19:06:26, 1.36s/it] 18%|█▊ | 11354/61904 [5:25:46<18:57:48, 1.35s/it] 18%|█▊ | 11355/61904 [5:25:47<19:03:18, 1.36s/it] 18%|█▊ | 11356/61904 [5:25:49<18:53:19, 1.35s/it] 18%|█▊ | 11357/61904 [5:25:50<19:19:59, 1.38s/it] 18%|█▊ | 11358/61904 [5:25:52<20:02:15, 1.43s/it] 18%|█▊ | 11359/61904 [5:25:53<19:46:59, 1.41s/it] 18%|█▊ | 11360/61904 [5:25:54<20:04:14, 1.43s/it] {'loss': 2.7795, 'learning_rate': 1.8191365227537923e-07, 'epoch': 2.94} + 18%|█▊ | 11360/61904 [5:25:54<20:04:14, 1.43s/it] 18%|█▊ | 11361/61904 [5:25:56<19:59:28, 1.42s/it] 18%|█▊ | 11362/61904 [5:25:57<20:00:39, 1.43s/it] 18%|█▊ | 11363/61904 [5:25:59<20:35:31, 1.47s/it] 18%|█▊ | 11364/61904 [5:26:00<19:38:06, 1.40s/it] 18%|█▊ | 11365/61904 [5:26:01<19:26:09, 1.38s/it] 18%|█▊ | 11366/61904 [5:26:03<19:35:32, 1.40s/it] 18%|█▊ | 11367/61904 [5:26:04<19:07:27, 1.36s/it] 18%|█▊ | 11368/61904 [5:26:06<19:25:45, 1.38s/it] 18%|█▊ | 11369/61904 [5:26:07<19:55:58, 1.42s/it] 18%|█▊ | 11370/61904 [5:26:09<20:22:29, 1.45s/it] 18%|█▊ | 11371/61904 [5:26:10<19:57:58, 1.42s/it] 18%|█▊ | 11372/61904 [5:26:11<19:48:00, 1.41s/it] 18%|█▊ | 11373/61904 [5:26:13<19:33:44, 1.39s/it] 18%|█▊ | 11374/61904 [5:26:14<19:35:45, 1.40s/it] 18%|█▊ | 11375/61904 [5:26:15<19:16:45, 1.37s/it] 18%|█▊ | 11376/61904 [5:26:17<19:31:38, 1.39s/it] 18%|█▊ | 11377/61904 [5:26:18<19:19:04, 1.38s/it] 18%|█▊ | 11378/61904 [5:26:20<19:39:33, 1.40s/it] 18%|█▊ | 11379/61904 [5:26:21<19:46:31, 1.41s/it] 18%|█▊ | 11380/61904 [5:26:22<19:44:20, 1.41s/it] {'loss': 2.7977, 'learning_rate': 1.818812394658369e-07, 'epoch': 2.94} + 18%|█▊ | 11380/61904 [5:26:22<19:44:20, 1.41s/it] 18%|█▊ | 11381/61904 [5:26:24<19:45:56, 1.41s/it] 18%|█▊ | 11382/61904 [5:26:25<19:35:45, 1.40s/it] 18%|█▊ | 11383/61904 [5:26:27<20:00:12, 1.43s/it] 18%|█▊ | 11384/61904 [5:26:28<19:54:24, 1.42s/it] 18%|█▊ | 11385/61904 [5:26:29<19:28:25, 1.39s/it] 18%|█▊ | 11386/61904 [5:26:31<19:38:53, 1.40s/it] 18%|█▊ | 11387/61904 [5:26:32<19:32:21, 1.39s/it] 18%|█▊ | 11388/61904 [5:26:34<19:37:57, 1.40s/it] 18%|█▊ | 11389/61904 [5:26:35<19:48:10, 1.41s/it] 18%|█▊ | 11390/61904 [5:26:37<19:52:41, 1.42s/it] 18%|█▊ | 11391/61904 [5:26:38<19:54:09, 1.42s/it] 18%|█▊ | 11392/61904 [5:26:40<20:22:33, 1.45s/it] 18%|█▊ | 11393/61904 [5:26:41<20:34:22, 1.47s/it] 18%|█▊ | 11394/61904 [5:26:42<19:54:52, 1.42s/it] 18%|█▊ | 11395/61904 [5:26:44<19:35:02, 1.40s/it] 18%|█▊ | 11396/61904 [5:26:45<19:36:58, 1.40s/it] 18%|█▊ | 11397/61904 [5:26:46<19:26:29, 1.39s/it] 18%|█▊ | 11398/61904 [5:26:48<19:30:31, 1.39s/it] 18%|█▊ | 11399/61904 [5:26:49<18:56:29, 1.35s/it] 18%|█▊ | 11400/61904 [5:26:50<19:05:53, 1.36s/it] {'loss': 2.7824, 'learning_rate': 1.8184882665629455e-07, 'epoch': 2.95} + 18%|█▊ | 11400/61904 [5:26:50<19:05:53, 1.36s/it] 18%|█▊ | 11401/61904 [5:26:52<19:15:30, 1.37s/it] 18%|█▊ | 11402/61904 [5:26:53<19:19:38, 1.38s/it] 18%|█▊ | 11403/61904 [5:26:55<20:00:35, 1.43s/it] 18%|█▊ | 11404/61904 [5:26:56<20:03:44, 1.43s/it] 18%|█▊ | 11405/61904 [5:26:58<19:40:24, 1.40s/it] 18%|█▊ | 11406/61904 [5:26:59<19:27:25, 1.39s/it] 18%|█▊ | 11407/61904 [5:27:00<19:32:56, 1.39s/it] 18%|█▊ | 11408/61904 [5:27:02<19:28:20, 1.39s/it] 18%|█▊ | 11409/61904 [5:27:03<19:41:42, 1.40s/it] 18%|█▊ | 11410/61904 [5:27:04<18:46:29, 1.34s/it] 18%|█▊ | 11411/61904 [5:27:06<18:41:09, 1.33s/it] 18%|█▊ | 11412/61904 [5:27:07<19:04:00, 1.36s/it] 18%|█▊ | 11413/61904 [5:27:09<19:24:10, 1.38s/it] 18%|█▊ | 11414/61904 [5:27:10<19:33:26, 1.39s/it] 18%|█▊ | 11415/61904 [5:27:11<19:34:05, 1.40s/it] 18%|█▊ | 11416/61904 [5:27:13<19:22:02, 1.38s/it] 18%|█▊ | 11417/61904 [5:27:14<18:44:50, 1.34s/it] 18%|█▊ | 11418/61904 [5:27:15<18:50:09, 1.34s/it] 18%|█▊ | 11419/61904 [5:27:17<20:02:44, 1.43s/it] 18%|█▊ | 11420/61904 [5:27:18<19:33:21, 1.39s/it] {'loss': 2.7671, 'learning_rate': 1.8181641384675224e-07, 'epoch': 2.95} + 18%|█▊ | 11420/61904 [5:27:18<19:33:21, 1.39s/it] 18%|█▊ | 11421/61904 [5:27:20<19:10:18, 1.37s/it] 18%|█▊ | 11422/61904 [5:27:21<18:57:02, 1.35s/it] 18%|█▊ | 11423/61904 [5:27:22<19:13:15, 1.37s/it] 18%|█▊ | 11424/61904 [5:27:24<19:46:48, 1.41s/it] 18%|█▊ | 11425/61904 [5:27:25<19:19:56, 1.38s/it] 18%|█▊ | 11426/61904 [5:27:26<18:47:20, 1.34s/it] 18%|█▊ | 11427/61904 [5:27:28<18:55:58, 1.35s/it] 18%|█▊ | 11428/61904 [5:27:29<19:35:06, 1.40s/it] 18%|█▊ | 11429/61904 [5:27:31<19:36:00, 1.40s/it] 18%|█▊ | 11430/61904 [5:27:32<19:46:52, 1.41s/it] 18%|█▊ | 11431/61904 [5:27:33<19:07:25, 1.36s/it] 18%|█▊ | 11432/61904 [5:27:35<18:57:07, 1.35s/it] 18%|█▊ | 11433/61904 [5:27:36<19:25:29, 1.39s/it] 18%|█▊ | 11434/61904 [5:27:37<19:08:33, 1.37s/it] 18%|█▊ | 11435/61904 [5:27:39<18:58:46, 1.35s/it] 18%|█▊ | 11436/61904 [5:27:40<18:57:55, 1.35s/it] 18%|█▊ | 11437/61904 [5:27:41<18:59:02, 1.35s/it] 18%|█▊ | 11438/61904 [5:27:43<19:04:56, 1.36s/it] 18%|█▊ | 11439/61904 [5:27:44<20:09:59, 1.44s/it] 18%|█▊ | 11440/61904 [5:27:46<20:02:05, 1.43s/it] {'loss': 2.8553, 'learning_rate': 1.817840010372099e-07, 'epoch': 2.96} + 18%|█▊ | 11440/61904 [5:27:46<20:02:05, 1.43s/it] 18%|█▊ | 11441/61904 [5:27:47<20:22:59, 1.45s/it] 18%|█▊ | 11442/61904 [5:27:49<19:37:48, 1.40s/it] 18%|█▊ | 11443/61904 [5:27:50<19:20:34, 1.38s/it] 18%|█▊ | 11444/61904 [5:27:51<19:46:39, 1.41s/it] 18%|█▊ | 11445/61904 [5:27:53<19:47:10, 1.41s/it] 18%|█▊ | 11446/61904 [5:27:54<20:15:43, 1.45s/it] 18%|█▊ | 11447/61904 [5:27:56<19:56:56, 1.42s/it] 18%|█▊ | 11448/61904 [5:27:57<19:44:40, 1.41s/it] 18%|█▊ | 11449/61904 [5:27:58<18:54:42, 1.35s/it] 18%|█▊ | 11450/61904 [5:28:00<19:41:29, 1.41s/it] 18%|█▊ | 11451/61904 [5:28:01<19:17:15, 1.38s/it] 18%|█▊ | 11452/61904 [5:28:03<19:23:17, 1.38s/it] 19%|█▊ | 11453/61904 [5:28:04<19:35:39, 1.40s/it] 19%|█▊ | 11454/61904 [5:28:06<19:59:33, 1.43s/it] 19%|█▊ | 11455/61904 [5:28:07<19:29:01, 1.39s/it] 19%|█▊ | 11456/61904 [5:28:08<19:03:34, 1.36s/it] 19%|█▊ | 11457/61904 [5:28:09<18:30:51, 1.32s/it] 19%|█▊ | 11458/61904 [5:28:11<18:39:12, 1.33s/it] 19%|█▊ | 11459/61904 [5:28:12<18:48:07, 1.34s/it] 19%|█▊ | 11460/61904 [5:28:14<19:38:48, 1.40s/it] {'loss': 2.7253, 'learning_rate': 1.8175158822766757e-07, 'epoch': 2.96} + 19%|█▊ | 11460/61904 [5:28:14<19:38:48, 1.40s/it] 19%|█▊ | 11461/61904 [5:28:15<19:05:58, 1.36s/it] 19%|█▊ | 11462/61904 [5:28:16<19:01:07, 1.36s/it] 19%|█▊ | 11463/61904 [5:28:17<18:35:45, 1.33s/it] 19%|█▊ | 11464/61904 [5:28:19<18:32:28, 1.32s/it] 19%|█▊ | 11465/61904 [5:28:20<18:23:50, 1.31s/it] 19%|█▊ | 11466/61904 [5:28:21<18:49:43, 1.34s/it] 19%|█▊ | 11467/61904 [5:28:23<18:45:20, 1.34s/it] 19%|█▊ | 11468/61904 [5:28:24<19:00:06, 1.36s/it] 19%|█▊ | 11469/61904 [5:28:26<19:01:49, 1.36s/it] 19%|█▊ | 11470/61904 [5:28:27<19:19:12, 1.38s/it] 19%|█▊ | 11471/61904 [5:28:28<18:53:57, 1.35s/it] 19%|█▊ | 11472/61904 [5:28:30<19:28:42, 1.39s/it] 19%|█▊ | 11473/61904 [5:28:31<19:13:51, 1.37s/it] 19%|█▊ | 11474/61904 [5:28:33<19:30:36, 1.39s/it] 19%|█▊ | 11475/61904 [5:28:34<19:18:39, 1.38s/it] 19%|█▊ | 11476/61904 [5:28:35<20:01:46, 1.43s/it] 19%|█▊ | 11477/61904 [5:28:37<19:37:02, 1.40s/it] 19%|█▊ | 11478/61904 [5:28:38<18:45:35, 1.34s/it] 19%|█▊ | 11479/61904 [5:28:39<18:58:21, 1.35s/it] 19%|█▊ | 11480/61904 [5:28:41<18:58:13, 1.35s/it] {'loss': 2.8338, 'learning_rate': 1.8171917541812525e-07, 'epoch': 2.97} + 19%|█▊ | 11480/61904 [5:28:41<18:58:13, 1.35s/it] 19%|█▊ | 11481/61904 [5:28:42<19:08:13, 1.37s/it] 19%|█▊ | 11482/61904 [5:28:43<18:57:01, 1.35s/it] 19%|█▊ | 11483/61904 [5:28:45<19:05:33, 1.36s/it] 19%|█▊ | 11484/61904 [5:28:46<18:44:07, 1.34s/it] 19%|█▊ | 11485/61904 [5:28:47<18:26:35, 1.32s/it] 19%|█▊ | 11486/61904 [5:28:49<18:36:40, 1.33s/it] 19%|█▊ | 11487/61904 [5:28:50<18:26:11, 1.32s/it] 19%|█▊ | 11488/61904 [5:28:51<18:39:38, 1.33s/it] 19%|█▊ | 11489/61904 [5:28:53<18:57:02, 1.35s/it] 19%|█▊ | 11490/61904 [5:28:54<18:35:46, 1.33s/it] 19%|█▊ | 11491/61904 [5:28:55<18:43:33, 1.34s/it] 19%|█▊ | 11492/61904 [5:28:57<18:55:36, 1.35s/it] 19%|█▊ | 11493/61904 [5:28:58<18:46:17, 1.34s/it] 19%|█▊ | 11494/61904 [5:29:00<19:03:32, 1.36s/it] 19%|█▊ | 11495/61904 [5:29:01<18:38:37, 1.33s/it] 19%|█▊ | 11496/61904 [5:29:02<18:55:56, 1.35s/it] 19%|█▊ | 11497/61904 [5:29:03<18:08:49, 1.30s/it] 19%|█▊ | 11498/61904 [5:29:05<19:21:11, 1.38s/it] 19%|█▊ | 11499/61904 [5:29:06<19:31:01, 1.39s/it] 19%|█▊ | 11500/61904 [5:29:08<19:56:06, 1.42s/it] {'loss': 2.7843, 'learning_rate': 1.816867626085829e-07, 'epoch': 2.97} + 19%|█▊ | 11500/61904 [5:29:08<19:56:06, 1.42s/it] 19%|█▊ | 11501/61904 [5:29:09<19:51:25, 1.42s/it] 19%|█▊ | 11502/61904 [5:29:11<19:41:03, 1.41s/it] 19%|█▊ | 11503/61904 [5:29:12<20:18:35, 1.45s/it] 19%|█▊ | 11504/61904 [5:29:14<19:49:26, 1.42s/it] 19%|█▊ | 11505/61904 [5:29:15<19:11:33, 1.37s/it] 19%|█▊ | 11506/61904 [5:29:16<19:16:20, 1.38s/it] 19%|█▊ | 11507/61904 [5:29:18<19:14:38, 1.37s/it] 19%|█▊ | 11508/61904 [5:29:19<18:49:02, 1.34s/it] 19%|█▊ | 11509/61904 [5:29:20<19:24:25, 1.39s/it] 19%|█▊ | 11510/61904 [5:29:22<18:46:29, 1.34s/it] 19%|█▊ | 11511/61904 [5:29:23<18:57:36, 1.35s/it] 19%|█▊ | 11512/61904 [5:29:24<18:42:40, 1.34s/it] 19%|█▊ | 11513/61904 [5:29:26<19:44:54, 1.41s/it] 19%|█▊ | 11514/61904 [5:29:27<19:27:42, 1.39s/it] 19%|█▊ | 11515/61904 [5:29:29<19:23:00, 1.38s/it] 19%|█▊ | 11516/61904 [5:29:30<19:19:12, 1.38s/it] 19%|█▊ | 11517/61904 [5:29:31<18:26:03, 1.32s/it] 19%|█▊ | 11518/61904 [5:29:32<18:23:35, 1.31s/it] 19%|█▊ | 11519/61904 [5:29:34<18:42:16, 1.34s/it] 19%|█▊ | 11520/61904 [5:29:35<18:49:49, 1.35s/it] {'loss': 2.8181, 'learning_rate': 1.8165434979904058e-07, 'epoch': 2.98} + 19%|█▊ | 11520/61904 [5:29:35<18:49:49, 1.35s/it] 19%|█▊ | 11521/61904 [5:29:37<19:07:12, 1.37s/it] 19%|█▊ | 11522/61904 [5:29:38<19:25:31, 1.39s/it] 19%|█▊ | 11523/61904 [5:29:39<19:01:35, 1.36s/it] 19%|█▊ | 11524/61904 [5:29:40<18:29:12, 1.32s/it] 19%|█▊ | 11525/61904 [5:29:42<19:20:38, 1.38s/it] 19%|█▊ | 11526/61904 [5:29:43<19:01:36, 1.36s/it] 19%|█▊ | 11527/61904 [5:29:45<19:39:32, 1.40s/it] 19%|█▊ | 11528/61904 [5:29:46<19:38:15, 1.40s/it] 19%|█▊ | 11529/61904 [5:29:48<19:50:35, 1.42s/it] 19%|█▊ | 11530/61904 [5:29:49<19:15:23, 1.38s/it] 19%|█▊ | 11531/61904 [5:29:50<18:54:28, 1.35s/it] 19%|█▊ | 11532/61904 [5:29:52<19:09:57, 1.37s/it] 19%|█▊ | 11533/61904 [5:29:53<19:09:38, 1.37s/it] 19%|█▊ | 11534/61904 [5:29:54<19:11:20, 1.37s/it] 19%|█▊ | 11535/61904 [5:29:56<18:42:55, 1.34s/it] 19%|█▊ | 11536/61904 [5:29:57<19:21:56, 1.38s/it] 19%|█▊ | 11537/61904 [5:29:59<19:36:38, 1.40s/it] 19%|█▊ | 11538/61904 [5:30:00<19:42:05, 1.41s/it] 19%|█▊ | 11539/61904 [5:30:01<19:31:29, 1.40s/it] 19%|█▊ | 11540/61904 [5:30:03<19:16:06, 1.38s/it] {'loss': 2.7918, 'learning_rate': 1.8162193698949824e-07, 'epoch': 2.98} + 19%|█▊ | 11540/61904 [5:30:03<19:16:06, 1.38s/it] 19%|█▊ | 11541/61904 [5:30:04<19:07:45, 1.37s/it] 19%|█▊ | 11542/61904 [5:30:06<19:59:29, 1.43s/it] 19%|█▊ | 11543/61904 [5:30:07<19:44:25, 1.41s/it] 19%|█▊ | 11544/61904 [5:30:08<19:12:44, 1.37s/it] 19%|█▊ | 11545/61904 [5:30:10<18:49:06, 1.35s/it] 19%|█▊ | 11546/61904 [5:30:11<18:34:09, 1.33s/it] 19%|█▊ | 11547/61904 [5:30:12<18:40:43, 1.34s/it] 19%|█▊ | 11548/61904 [5:30:14<19:06:24, 1.37s/it] 19%|█▊ | 11549/61904 [5:30:15<18:57:15, 1.36s/it] 19%|█▊ | 11550/61904 [5:30:16<19:14:27, 1.38s/it] 19%|█▊ | 11551/61904 [5:30:18<19:19:56, 1.38s/it] 19%|█▊ | 11552/61904 [5:30:19<19:14:51, 1.38s/it] 19%|█▊ | 11553/61904 [5:30:21<19:58:11, 1.43s/it] 19%|█▊ | 11554/61904 [5:30:22<18:51:14, 1.35s/it] 19%|█▊ | 11555/61904 [5:30:23<18:53:18, 1.35s/it] 19%|█▊ | 11556/61904 [5:30:24<18:20:27, 1.31s/it] 19%|█▊ | 11557/61904 [5:30:26<18:18:46, 1.31s/it] 19%|█▊ | 11558/61904 [5:30:27<18:30:29, 1.32s/it] 19%|█▊ | 11559/61904 [5:30:29<19:05:18, 1.36s/it] 19%|█▊ | 11560/61904 [5:30:30<18:49:57, 1.35s/it] {'loss': 2.8051, 'learning_rate': 1.815895241799559e-07, 'epoch': 2.99} + 19%|█▊ | 11560/61904 [5:30:30<18:49:57, 1.35s/it] 19%|█▊ | 11561/61904 [5:30:31<19:06:56, 1.37s/it] 19%|█▊ | 11562/61904 [5:30:33<19:17:42, 1.38s/it] 19%|█▊ | 11563/61904 [5:30:34<19:34:08, 1.40s/it] 19%|█▊ | 11564/61904 [5:30:36<20:11:21, 1.44s/it] 19%|█▊ | 11565/61904 [5:30:37<21:04:19, 1.51s/it] 19%|█▊ | 11566/61904 [5:30:39<20:11:10, 1.44s/it] 19%|█▊ | 11567/61904 [5:30:40<19:50:05, 1.42s/it] 19%|█▊ | 11568/61904 [5:30:41<19:13:13, 1.37s/it] 19%|█▊ | 11569/61904 [5:30:43<19:40:07, 1.41s/it] 19%|█▊ | 11570/61904 [5:30:44<19:08:28, 1.37s/it] 19%|█▊ | 11571/61904 [5:30:45<18:51:07, 1.35s/it] 19%|█▊ | 11572/61904 [5:30:47<18:42:00, 1.34s/it] 19%|█▊ | 11573/61904 [5:30:48<18:16:24, 1.31s/it] 19%|█▊ | 11574/61904 [5:30:49<18:29:35, 1.32s/it] 19%|█▊ | 11575/61904 [5:30:51<18:39:35, 1.33s/it] 19%|█▊ | 11576/61904 [5:30:52<18:56:09, 1.35s/it] 19%|█▊ | 11577/61904 [5:30:53<18:25:58, 1.32s/it] 19%|█▊ | 11578/61904 [5:30:55<18:28:08, 1.32s/it] 19%|█▊ | 11579/61904 [5:30:56<18:29:51, 1.32s/it] 19%|█▊ | 11580/61904 [5:30:57<18:31:42, 1.33s/it] {'loss': 2.8276, 'learning_rate': 1.815571113704136e-07, 'epoch': 2.99} + 19%|█▊ | 11580/61904 [5:30:57<18:31:42, 1.33s/it] 19%|█▊ | 11581/61904 [5:30:59<18:39:59, 1.34s/it] 19%|█▊ | 11582/61904 [5:31:00<19:21:04, 1.38s/it] 19%|█▊ | 11583/61904 [5:31:02<21:30:21, 1.54s/it] 19%|█▊ | 11584/61904 [5:31:03<20:38:07, 1.48s/it] 19%|█▊ | 11585/61904 [5:31:05<20:54:35, 1.50s/it] 19%|█▊ | 11586/61904 [5:31:06<19:56:48, 1.43s/it] 19%|█▊ | 11587/61904 [5:31:07<19:14:43, 1.38s/it] 19%|█▊ | 11588/61904 [5:31:09<18:52:41, 1.35s/it] 19%|█▊ | 11589/61904 [5:31:10<18:26:49, 1.32s/it] 19%|█▊ | 11590/61904 [5:31:11<18:33:11, 1.33s/it] 19%|█▊ | 11591/61904 [5:31:13<18:34:34, 1.33s/it] 19%|█▊ | 11592/61904 [5:31:14<20:02:51, 1.43s/it] 19%|█▊ | 11593/61904 [5:31:16<19:50:18, 1.42s/it] 19%|█▊ | 11594/61904 [5:31:17<20:07:18, 1.44s/it] 19%|█▊ | 11595/61904 [5:31:18<19:27:35, 1.39s/it] 19%|█▊ | 11596/61904 [5:31:20<18:59:37, 1.36s/it] 19%|█▊ | 11597/61904 [5:31:21<19:40:46, 1.41s/it] 19%|█▊ | 11598/61904 [5:31:22<19:02:26, 1.36s/it] 19%|█▊ | 11599/61904 [5:31:24<18:35:39, 1.33s/it] 19%|█▊ | 11600/61904 [5:31:25<18:42:04, 1.34s/it] {'loss': 2.8109, 'learning_rate': 1.8152469856087125e-07, 'epoch': 3.0} + 19%|█▊ | 11600/61904 [5:31:25<18:42:04, 1.34s/it] 19%|█▊ | 11601/61904 [5:31:26<18:38:19, 1.33s/it] 19%|█▊ | 11602/61904 [5:31:28<18:34:03, 1.33s/it] 19%|█▊ | 11603/61904 [5:31:29<18:44:09, 1.34s/it] 19%|█▊ | 11604/61904 [5:31:30<18:28:21, 1.32s/it] 19%|█▊ | 11605/61904 [5:31:32<18:38:11, 1.33s/it] 19%|█▊ | 11606/61904 [5:31:33<18:25:34, 1.32s/it] 19%|█▉ | 11607/61904 [5:31:34<18:43:32, 1.34s/it] 19%|█▉ | 11608/61904 [5:31:36<18:28:34, 1.32s/it]Generation Kwargs: +{'max_length': 384, 'max_gen_length': 380, 'num_beams': 5} + + 0%| | 0/861 [00:00> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 19%|█▉ | 11609/61904 [6:04:18<8237:10:56, 589.60s/it] 19%|█▉ | 11610/61904 [6:04:20<5773:02:43, 413.23s/it] 19%|█▉ | 11611/61904 [6:04:21<4046:58:49, 289.69s/it] 19%|█▉ | 11612/61904 [6:04:22<2838:21:34, 203.18s/it] 19%|█▉ | 11613/61904 [6:04:24<1992:53:35, 142.66s/it] 19%|█▉ | 11614/61904 [6:04:25<1401:03:08, 100.29s/it] 19%|█▉ | 11615/61904 [6:04:27<986:15:33, 70.60s/it] 19%|█▉ | 11616/61904 [6:04:28<696:06:03, 49.83s/it] 19%|█▉ | 11617/61904 [6:04:29<492:53:12, 35.29s/it] 19%|█▉ | 11618/61904 [6:04:31<350:58:48, 25.13s/it] 19%|█▉ | 11619/61904 [6:04:32<251:37:02, 18.01s/it] 19%|█▉ | 11620/61904 [6:04:33<181:18:10, 12.98s/it] {'loss': 2.8225, 'learning_rate': 1.8149228575132891e-07, 'epoch': 3.0} + 19%|█▉ | 11620/61904 [6:04:33<181:18:10, 12.98s/it] 19%|█▉ | 11621/61904 [6:04:35<133:00:31, 9.52s/it] 19%|█▉ | 11622/61904 [6:04:37<100:07:02, 7.17s/it] 19%|█▉ | 11623/61904 [6:04:38<75:58:30, 5.44s/it] 19%|█▉ | 11624/61904 [6:04:39<58:47:41, 4.21s/it] 19%|█▉ | 11625/61904 [6:04:41<46:56:52, 3.36s/it] 19%|█▉ | 11626/61904 [6:04:42<38:49:45, 2.78s/it] 19%|█▉ | 11627/61904 [6:04:44<33:38:10, 2.41s/it] 19%|█▉ | 11628/61904 [6:04:45<28:46:00, 2.06s/it] 19%|█▉ | 11629/61904 [6:04:46<25:49:53, 1.85s/it] 19%|█▉ | 11630/61904 [6:04:48<23:55:02, 1.71s/it] 19%|█▉ | 11631/61904 [6:04:49<22:23:59, 1.60s/it] 19%|█▉ | 11632/61904 [6:04:50<21:46:45, 1.56s/it] 19%|█▉ | 11633/61904 [6:04:52<21:23:28, 1.53s/it] 19%|█▉ | 11634/61904 [6:04:53<20:32:33, 1.47s/it] 19%|█▉ | 11635/61904 [6:04:55<19:51:28, 1.42s/it] 19%|█▉ | 11636/61904 [6:04:56<19:26:14, 1.39s/it] 19%|█▉ | 11637/61904 [6:04:57<19:18:33, 1.38s/it] 19%|█▉ | 11638/61904 [6:04:59<19:11:37, 1.37s/it] 19%|█▉ | 11639/61904 [6:05:00<19:44:00, 1.41s/it] 19%|█▉ | 11640/61904 [6:05:01<19:19:00, 1.38s/it] {'loss': 2.7593, 'learning_rate': 1.814598729417866e-07, 'epoch': 3.01} + 19%|█▉ | 11640/61904 [6:05:01<19:19:00, 1.38s/it] 19%|█▉ | 11641/61904 [6:05:03<19:14:00, 1.38s/it] 19%|█▉ | 11642/61904 [6:05:04<19:33:04, 1.40s/it] 19%|█▉ | 11643/61904 [6:05:06<19:11:00, 1.37s/it] 19%|█▉ | 11644/61904 [6:05:07<19:01:33, 1.36s/it] 19%|█▉ | 11645/61904 [6:05:08<19:07:31, 1.37s/it] 19%|█▉ | 11646/61904 [6:05:10<19:33:26, 1.40s/it] 19%|█▉ | 11647/61904 [6:05:11<20:44:42, 1.49s/it] \ No newline at end of file