diff --git "a/wandb/run-20220228_204859-8xn2plkx/files/output.log" "b/wandb/run-20220228_204859-8xn2plkx/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220228_204859-8xn2plkx/files/output.log" @@ -0,0 +1,2457 @@ + + + 0%| | 0/297 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:06,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:09,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:11,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:14,499 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:17,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:19,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7631, 'learning_rate': 2e-08, 'epoch': 0.0} + + 0%|▎ | 1/297 [00:22<1:49:11, 22.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:49:25,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:28,567 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:31,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:33,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:36,256 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:38,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:41,440 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8416, 'learning_rate': 4e-08, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:44,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 2/297 [00:43<1:46:27, 21.65s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:49:46,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:49,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:51,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:54,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:56,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:49:59,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:02,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9134, 'learning_rate': 6.000000000000001e-08, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:04,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 3/297 [01:03<1:43:33, 21.14s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:50:07,230 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:09,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:12,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:14,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:17,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:19,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:22,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.866, 'learning_rate': 8e-08, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:24,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█ | 4/297 [01:24<1:41:32, 20.79s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:50:27,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:29,833 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:32,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:34,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:37,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:39,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:42,273 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:44,721 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7491, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.02} + 2%|█▎ | 5/297 [01:44<1:39:37, 20.47s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:50:47,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:49,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:52,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:54,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:57,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:50:59,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:02,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:04,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 6/297 [02:03<1:38:06, 20.23s/it] + + 2%|█▋ | 6/297 [02:03<1:38:06, 20.23s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:51:07,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:09,510 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:11,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:14,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:16,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:19,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:21,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:24,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.865, 'learning_rate': 1.2000000000000002e-07, 'epoch': 0.02} + + 2%|█▉ | 7/297 [02:23<1:36:58, 20.06s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:51:26,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:29,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:31,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:34,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:36,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:39,020 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:41,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:43,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 8/297 [02:43<1:36:02, 19.94s/it] + + 3%|██▏ | 8/297 [02:43<1:36:02, 19.94s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:51:46,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:48,828 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:51,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:53,562 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:56,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:51:58,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:00,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:03,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 9/297 [03:02<1:34:52, 19.76s/it] + + 3%|██▍ | 9/297 [03:02<1:34:52, 19.76s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:52:05,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:08,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:10,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:13,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:15,445 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:17,809 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:20,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7175, 'learning_rate': 1.8e-07, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:22,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▋ | 10/297 [03:21<1:33:49, 19.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:52:25,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:27,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:29,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:32,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:34,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:36,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:39,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:41,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 11/297 [03:40<1:32:34, 19.42s/it] + + 4%|██▉ | 11/297 [03:40<1:32:34, 19.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:52:44,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:46,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:48,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:51,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:53,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:55,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:52:58,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:00,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 12/297 [03:59<1:31:34, 19.28s/it] + + 4%|███▏ | 12/297 [03:59<1:31:34, 19.28s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:02,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:05,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:07,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:10,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:12,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:15,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:17,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8214, 'learning_rate': 2.4000000000000003e-07, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:19,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 13/297 [04:19<1:31:25, 19.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 13/297 [04:19<1:31:25, 19.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:27,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:27,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:31,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:31,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:36,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:36, 19.21s/it]g-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:36, 19.21s/it]g-point operations will not be computed-28 20:53:22,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:36, 19.21s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:36, 19.21s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:45,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:45,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:50,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:50,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:55,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:53:55,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [04:56<1:29:22, 19.02s/it]g-point operations will not be computed-28 20:53:41,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [04:56<1:29:22, 19.02s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [04:56<1:29:22, 19.02s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:04,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:04,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:09,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:09,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:13,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:13,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:15<1:28:19, 18.86s/it]g-point operations will not be computed-28 20:53:59,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:15<1:28:19, 18.86s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:15<1:28:19, 18.86s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:22,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:22,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:27,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:27,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:31,992 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:31,992 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:33<1:27:24, 18.73s/it]g-point operations will not be computed-28 20:54:18,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:33<1:27:24, 18.73s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:33<1:27:24, 18.73s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:41,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:41,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:45,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:45,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:50,234 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:22, 18.58s/it]g-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:22, 18.58s/it]g-point operations will not be computed-28 20:54:36,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:22, 18.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:22, 18.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:59,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:54:59,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:03,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:03,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:08,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:36, 18.48s/it]g-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:36, 18.48s/it]g-point operations will not be computed-28 20:54:54,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:36, 18.48s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:36, 18.48s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:17,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:17,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:22,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:22,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:26,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:59, 18.41s/it]g-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:59, 18.41s/it]g-point operations will not be computed-28 20:55:13,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:59, 18.41s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:59, 18.41s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:35,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:35,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:40,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:40,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:44,780 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:44,780 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:46<1:24:04, 18.28s/it]g-point operations will not be computed-28 20:55:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:46<1:24:04, 18.28s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:46<1:24:04, 18.28s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:53,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:53,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:58,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:55:58,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:02,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:04<1:23:06, 18.13s/it]g-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:04<1:23:06, 18.13s/it]g-point operations will not be computed-28 20:55:49,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:04<1:23:06, 18.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:04<1:23:06, 18.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:11,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:11,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:15,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:15,958 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:20,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:20,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:21<1:22:17, 18.02s/it]g-point operations will not be computed-28 20:56:07,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:21<1:22:17, 18.02s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:21<1:22:17, 18.02s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:29,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:29,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:33,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:33,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:37,898 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:17, 17.87s/it]g-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:17, 17.87s/it]g-point operations will not be computed-28 20:56:24,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:17, 17.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:17, 17.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:46,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:46,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:50,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:50,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:55,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:56:55,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [07:57<1:21:03, 17.88s/it]g-point operations will not be computed-28 20:56:42,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [07:57<1:21:03, 17.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [07:57<1:21:03, 17.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:04,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:04,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:04,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:04,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:04,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:04,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:05, 17.73s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:05, 17.73s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:21,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:21,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:21,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:21,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:57:21,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7982, 'learning_rate': 5.2e-07, 'epoch': 0.09} + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:02, 17.57s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8116, 'learning_rate': 5.4e-07, 'epoch': 0.09} + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:17:59, 17.40s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7091, 'learning_rate': 5.6e-07, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:58:06,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8633, 'learning_rate': 5.800000000000001e-07, 'epoch': 0.1} + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:07, 17.11s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:01, 16.92s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:01, 16.76s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:11<1:13:07, 16.62s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:48, 16.38s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:31, 16.15s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:31, 16.15s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:31, 16.15s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:31, 16.15s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:31, 16.15s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:31, 16.15s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:59:57,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 20:59:57,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:09, 15.90s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:14<1:08:36, 15.83s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:30<1:08:13, 15.80s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:30<1:08:13, 15.80s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:30<1:08:13, 15.80s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:30<1:08:13, 15.80s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:30<1:08:13, 15.80s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:00:41,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:00:41,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:00:41,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:44<1:06:46, 15.53s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:44<1:06:46, 15.53s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:44<1:06:46, 15.53s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:44<1:06:46, 15.53s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:44<1:06:46, 15.53s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:00:56,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:00:56,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:00:56,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [11:59<1:05:03, 15.19s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [11:59<1:05:03, 15.19s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [11:59<1:05:03, 15.19s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:06,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:06,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:06,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:06,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:06,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 41/297 [12:12<1:02:32, 14.66s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:16,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:16,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:16,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:16,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:24,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:24,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 42/297 [12:25<59:52, 14.09s/it]g-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:29,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:29,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:29,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:29,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:36,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:36,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 20:57:00,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 43/297 [12:36<56:24, 13.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:01:38,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 43/297 [12:36<56:24, 13.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:01:38,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:42,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:38,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:42,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:38,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:46,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:38,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:46,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:38,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 44/297 [12:47<52:31, 12.46s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:01:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 44/297 [12:47<52:31, 12.46s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:01:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:52,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:55,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:55,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:01:55,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:49,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▍ | 45/297 [12:56<48:36, 11.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:01:58,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:00,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:58,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:02,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:58,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:05,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:58,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:05,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:01:58,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▋ | 46/297 [13:05<44:33, 10.65s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:07,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:09,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:07,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:10,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:07,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:12,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:07,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:12,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:07,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 47/297 [13:13<40:36, 9.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:14,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:16,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:14,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:18,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:14,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▎ | 48/297 [13:19<36:50, 8.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:21,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▎ | 48/297 [13:19<36:50, 8.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:21,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:22,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:21,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:25,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:21,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:25,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:21,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▌ | 49/297 [13:25<32:55, 7.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:27,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:28,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:27,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:31,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:27,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:31,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:27,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:31<30:10, 7.33s/it]g-point operations will not be computed-28 21:02:27,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:31<30:10, 7.33s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:31<30:10, 7.33s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:40,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:40,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:46,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:46,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:02:46,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6702, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.17} + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 51/297 [13:53<48:07, 11.74s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5504, 'learning_rate': 1.02e-06, 'epoch': 0.17} + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 52/297 [14:15<1:00:00, 14.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5079, 'learning_rate': 1.04e-06, 'epoch': 0.18} + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:36<1:07:31, 16.60s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4589, 'learning_rate': 1.08e-06, 'epoch': 0.18} + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|████████████���█▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:57<1:12:37, 17.93s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4616, 'learning_rate': 1.1e-06, 'epoch': 0.19} + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:39<1:18:13, 19.47s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6109, 'learning_rate': 1.12e-06, 'epoch': 0.19} + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:59<1:18:21, 19.59s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6348, 'learning_rate': 1.14e-06, 'epoch': 0.2} + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:18<1:18:19, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6056, 'learning_rate': 1.1600000000000001e-06, 'epoch': 0.2} + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:38<1:18:02, 19.67s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:17:39, 19.66s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:16:49, 19.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4502, 'learning_rate': 1.2200000000000002e-06, 'epoch': 0.21} + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:16:11, 19.45s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.584, 'learning_rate': 1.2400000000000002e-06, 'epoch': 0.21} + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:16:12, 19.54s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5269, 'learning_rate': 1.26e-06, 'epoch': 0.22} + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:15:28, 19.44s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.603, 'learning_rate': 1.28e-06, 'epoch': 0.22} + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:14:27, 19.26s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.583, 'learning_rate': 1.3e-06, 'epoch': 0.22} + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:13:52, 19.19s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.548, 'learning_rate': 1.32e-06, 'epoch': 0.23} +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:08:12,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██��███████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5167, 'learning_rate': 1.3600000000000001e-06, 'epoch': 0.23} + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:12:04, 18.88s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:07<1:10:31, 18.64s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4493, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.24} + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5737, 'learning_rate': 1.42e-06, 'epoch': 0.24} + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:43<1:08:51, 18.36s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:01<1:07:56, 18.20s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4769, 'learning_rate': 1.46e-06, 'epoch': 0.25} + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:19<1:07:19, 18.11s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5744, 'learning_rate': 1.48e-06, 'epoch': 0.25} + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:37<1:07:03, 18.12s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4586, 'learning_rate': 1.5e-06, 'epoch': 0.26} + g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:11:02,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:11:02,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:11:02,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:11:02,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:11:02,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4262, 'learning_rate': 1.52e-06, 'epoch': 0.26} + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:12<1:04:54, 17.70s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4633, 'learning_rate': 1.54e-06, 'epoch': 0.26} + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:29<1:03:59, 17.53s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:46<1:03:02, 17.35s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5469, 'learning_rate': 1.5800000000000001e-06, 'epoch': 0.27} + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:03<1:02:20, 17.24s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4678, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.27} + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:20<1:01:26, 17.07s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:36<1:00:31, 16.89s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4408, 'learning_rate': 1.6400000000000002e-06, 'epoch': 0.28} + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 83/297 [23:53<1:00:26, 16.95s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:09<59:07, 16.65s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:25<57:52, 16.38s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:40<56:28, 16.06s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:40<56:28, 16.06s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:13:46,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:13:46,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:13:46,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:13:46,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:13:46,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:13:46,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:55<55:02, 15.72s/it]g-point operations will not be computed-28 21:02:35,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:10<54:13, 15.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:10<54:13, 15.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:10<54:13, 15.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:10<54:13, 15.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:10<54:13, 15.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:24<52:25, 15.12s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:24<52:25, 15.12s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.542, 'learning_rate': 1.76e-06, 'epoch': 0.3} + 30%|████████████████████████▌ | 89/297 [25:24<52:25, 15.12s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:24<52:25, 15.12s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:34,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:34,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:34,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:34,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 90/297 [25:38<50:40, 14.69s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 90/297 [25:38<50:40, 14.69s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:44,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:44,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:44,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:44,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:51,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:51,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5876, 'learning_rate': 1.8000000000000001e-06, 'epoch': 0.31} +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:56,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:56,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:14:56,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:02,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▍ | 92/297 [26:03<45:50, 13.42s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▍ | 92/297 [26:03<45:50, 13.42s/it]g-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:06,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:06,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:10,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:10,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:10,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:10,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:14:13,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▋ | 93/297 [26:14<43:17, 12.73s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:15:16,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:18,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:16,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:18,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:16,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:22,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:16,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:22,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:16,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:22,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:16,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 94/297 [26:24<40:36, 12.00s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:28,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:28,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:32,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:34,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:34,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:37,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:39,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:41,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:43,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:43,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:45,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:47,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:48,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:52,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:54,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:57,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:57,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:15:58,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:01,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:02,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:02,719 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:05,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:06,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:06,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:08,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:08,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:13,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:13,683 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:18,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:18,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:24,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:16:24,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3301, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.34} + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:28<37:00, 11.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:49<46:00, 14.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:09<51:46, 16.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:30<55:42, 17.32s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.336, 'learning_rate': 2.1000000000000002e-06, 'epoch': 0.36} + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:50<58:05, 18.15s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2744, 'learning_rate': 2.12e-06, 'epoch': 0.36} + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 107/297 [29:30<1:00:11, 19.01s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3794, 'learning_rate': 2.1400000000000003e-06, 'epoch': 0.36} + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 108/297 [29:49<1:00:37, 19.25s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3837, 'learning_rate': 2.16e-06, 'epoch': 0.37} + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:09<1:00:30, 19.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3399, 'learning_rate': 2.1800000000000003e-06, 'epoch': 0.37} + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 110/297 [30:28<1:00:07, 19.29s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:47<59:37, 19.24s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.354, 'learning_rate': 2.2200000000000003e-06, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:06<59:05, 19.17s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2735, 'learning_rate': 2.24e-06, 'epoch': 0.38} + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [31:26<59:07, 19.28s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.305, 'learning_rate': 2.28e-06, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-02-28 21:21:09,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:21:09,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:21:09,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:21:09,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:21:09,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:21:09,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3163, 'learning_rate': 2.3000000000000004e-06, 'epoch': 0.39} + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:22<56:59, 18.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2756, 'learning_rate': 2.3200000000000002e-06, 'epoch': 0.39} + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:40<56:15, 18.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.332, 'learning_rate': 2.3400000000000005e-06, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:59<55:38, 18.65s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [33:17<54:55, 18.51s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:35<54:18, 18.41s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████���███████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:53<53:46, 18.33s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:11<53:05, 18.20s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:11<53:05, 18.20s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4307, 'learning_rate': 2.42e-06, 'epoch': 0.41} + 41%|█████████████████████████████████▎ | 122/297 [34:11<53:05, 18.20s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:11<53:05, 18.20s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:23:23,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:23:23,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:23:23,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2447, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.41} + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:29<52:17, 18.03s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3872, 'learning_rate': 2.46e-06, 'epoch': 0.42} + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:46<51:37, 17.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [35:04<51:17, 17.89s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3551, 'learning_rate': 2.5e-06, 'epoch': 0.42} + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:22<50:37, 17.76s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4112, 'learning_rate': 2.52e-06, 'epoch': 0.43} +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2566, 'learning_rate': 2.5400000000000002e-06, 'epoch': 0.43} +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:24:44,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:25:05,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:25:05,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:25:05,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:25:05,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3964, 'learning_rate': 2.56e-06, 'epoch': 0.43} + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:13<48:27, 17.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4793, 'learning_rate': 2.5800000000000003e-06, 'epoch': 0.44} + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|█████���█████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:30<47:36, 17.11s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:46<46:52, 16.94s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [37:03<46:02, 16.74s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|█��██████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:19<45:22, 16.60s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:34<44:16, 16.30s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:50<43:25, 16.08s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:26:54,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [38:05<42:24, 15.81s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3289, 'learning_rate': 2.7200000000000002e-06, 'epoch': 0.46} +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:21,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 138/297 [38:35<40:34, 15.31s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:39,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 139/297 [38:49<39:15, 14.91s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:53,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:53,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:53,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:27:53,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:01,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:01,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 140/297 [39:02<37:47, 14.44s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 140/297 [39:02<37:47, 14.44s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 140/297 [39:02<37:47, 14.44s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:09,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:09,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:09,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:15,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:15,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5266, 'learning_rate': 2.8000000000000003e-06, 'epoch': 0.47} +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:15,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:21,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:21,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:26,287 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 142/297 [39:27<34:16, 13.27s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 142/297 [39:27<34:16, 13.27s/it]g-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2802, 'learning_rate': 2.82e-06, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:31,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:31,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:36,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:36,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:36,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:15:26,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 143/297 [39:38<32:19, 12.60s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:28:40,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 143/297 [39:38<32:19, 12.60s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:28:40,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:43,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:40,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:46,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:40,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:46,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:40,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:46,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:40,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▎ | 144/297 [39:48<30:14, 11.86s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:52,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:52,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:56,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:58,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:28:58,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:02,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:04,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:06,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:06,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:08,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:10,361 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:12,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:13,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:13,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:15,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:18,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:20,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:20,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:23,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:24,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:26,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:26,979 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:29,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:30,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:30,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7617, 'learning_rate': 2.9800000000000003e-06, 'epoch': 0.5} +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:36,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:36,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:41,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:41,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:46,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:46,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:46,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:29:51,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [41:11<33:18, 13.78s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:31<37:28, 15.61s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.263, 'learning_rate': 3.0600000000000003e-06, 'epoch': 0.52} + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2973, 'learning_rate': 3.08e-06, 'epoch': 0.52} + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████��� | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:10<41:54, 17.71s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3268, 'learning_rate': 3.1000000000000004e-06, 'epoch': 0.52} + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:30<42:58, 18.29s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2318, 'learning_rate': 3.12e-06, 'epoch': 0.53} + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3031, 'learning_rate': 3.1400000000000004e-06, 'epoch': 0.53} + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:49<43:29, 18.64s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<43:38, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:47<43:19, 18.98s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2754, 'learning_rate': 3.2000000000000003e-06, 'epoch': 0.54} + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<42:54, 18.93s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2627, 'learning_rate': 3.2200000000000005e-06, 'epoch': 0.54} + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:24<42:28, 18.88s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3161, 'learning_rate': 3.2400000000000003e-06, 'epoch': 0.55} + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2449, 'learning_rate': 3.2600000000000006e-06, 'epoch': 0.55} + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:43<42:15, 18.92s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|██████████████████████████████���██████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:20<41:01, 18.65s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2734, 'learning_rate': 3.3000000000000006e-06, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:39<40:29, 18.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2985, 'learning_rate': 3.3200000000000004e-06, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:57<40:03, 18.49s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2804, 'learning_rate': 3.3400000000000006e-06, 'epoch': 0.56} + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████���███████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:15<39:35, 18.42s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2491, 'learning_rate': 3.3600000000000004e-06, 'epoch': 0.57} + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:33<39:06, 18.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1827, 'learning_rate': 3.4000000000000005e-06, 'epoch': 0.58} + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▎ | 170/297 [46:51<38:39, 18.27s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2962, 'learning_rate': 3.4200000000000007e-06, 'epoch': 0.58} + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:27<37:35, 18.04s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:45<37:01, 17.91s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:02<36:27, 17.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:20<36:18, 17.85s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:38<35:41, 17.70s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3487, 'learning_rate': 3.52e-06, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:55<35:04, 17.54s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2096, 'learning_rate': 3.54e-06, 'epoch': 0.6} + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:38:21,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:38:21,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:38:21,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:38:21,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:38:21,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:29<33:52, 17.23s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3968, 'learning_rate': 3.58e-06, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:45<33:15, 17.06s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2574, 'learning_rate': 3.6000000000000003e-06, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:02<32:46, 16.95s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.211, 'learning_rate': 3.62e-06, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:18<32:10, 16.79s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:34<31:30, 16.58s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:50<30:45, 16.33s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:06<30:01, 16.08s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:06<30:01, 16.08s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:06<30:01, 16.08s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:06<30:01, 16.08s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:40:16,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:40:16,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:40:16,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:40:16,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████��███████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [51:21<29:13, 15.80s/it]g-point operations will not be computed-28 21:28:50,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:36<28:35, 15.59s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:51<28:01, 15.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:51<28:01, 15.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:51<28:01, 15.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:51<28:01, 15.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:00,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:00,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:00,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:00,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 189/297 [52:05<27:06, 15.06s/it]g-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 189/297 [52:05<27:06, 15.06s/it]g-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:11,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:11,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:11,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:11,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:19,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:19,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3319, 'learning_rate': 3.7800000000000002e-06, 'epoch': 0.64} +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:19,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:25,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:25,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:25,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:32,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:32,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3619, 'learning_rate': 3.8000000000000005e-06, 'epoch': 0.64} +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:32,035 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:38,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:38,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:38,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:43,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:43,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4395, 'learning_rate': 3.820000000000001e-06, 'epoch': 0.65} +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:48,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:48,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:52,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:54,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:54,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5067, 'learning_rate': 3.8400000000000005e-06, 'epoch': 0.65} +[WARNING|modeling_utils.py:388] 2022-02-28 21:41:58,404 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:00,796 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:00,796 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:04,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:04,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:06,686 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:08,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:11,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:13,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:13,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4093, 'learning_rate': 3.88e-06, 'epoch': 0.66} +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:16,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:18,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 196/297 [53:20<16:42, 9.93s/it]g-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 196/297 [53:20<16:42, 9.93s/it]g-point operations will not be computed-28 21:40:38,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 196/297 [53:20<16:42, 9.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:22,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:23,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:22,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:27,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:22,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [53:27<15:07, 9.08s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:28,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [53:27<15:07, 9.08s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:28,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:30,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:28,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:32,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:28,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:33<13:36, 8.25s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:35,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:33<13:36, 8.25s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:35,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:36,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:35,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:39,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:35,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 199/297 [53:39<12:06, 7.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:40,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 199/297 [53:39<12:06, 7.42s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:40,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:42,850 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:40,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:44<10:52, 6.72s/it]g-point operations will not be computed-28 21:42:40,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:44<10:52, 6.72s/it]g-point operations will not be computed-28 21:42:40,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:44<10:52, 6.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:44<10:52, 6.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:53,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:53,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:58,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:42:58,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:43:03,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it]g-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it]g-point operations will not be computed-28 21:42:48,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2594, 'learning_rate': 4.0200000000000005e-06, 'epoch': 0.68} + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:05<17:45, 11.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2421, 'learning_rate': 4.04e-06, 'epoch': 0.68} + 68%|██████████████████████████████��████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:46<24:42, 15.78s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1647, 'learning_rate': 4.060000000000001e-06, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:06<26:20, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:25<27:17, 17.80s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2301, 'learning_rate': 4.1e-06, 'epoch': 0.69} + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2757, 'learning_rate': 4.12e-06, 'epoch': 0.7} + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:45<27:45, 18.30s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:23<27:54, 18.82s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:42<27:41, 18.88s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:43:08,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1103, 'learning_rate': 4.2000000000000004e-06, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:02<27:29, 18.96s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2436, 'learning_rate': 4.22e-06, 'epoch': 0.71} + 71%|███████████████████████████���█████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:39<26:45, 18.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2657, 'learning_rate': 4.24e-06, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:58<26:34, 18.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2639, 'learning_rate': 4.26e-06, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:17<26:05, 18.87s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:35<25:37, 18.75s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [58:54<25:07, 18.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:12<24:41, 18.52s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1902, 'learning_rate': 4.34e-06, 'epoch': 0.73} + 73%|█████████████��█████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:30<24:08, 18.34s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1969, 'learning_rate': 4.360000000000001e-06, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:48<23:39, 18.20s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:06<23:17, 18.15s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:24<22:53, 18.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:24<22:53, 18.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1718, 'learning_rate': 4.4e-06, 'epoch': 0.74} + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:24<22:53, 18.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:24<22:53, 18.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:24<22:53, 18.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:49:38,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:49:38,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1763, 'learning_rate': 4.42e-06, 'epoch': 0.75} + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:42<22:29, 17.99s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1954, 'learning_rate': 4.440000000000001e-06, 'epoch': 0.75} + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:00:59<22:04, 17.90s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:01:17<21:41, 17.83s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:35<21:22, 17.81s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:35<21:22, 17.81s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2145, 'learning_rate': 4.48e-06, 'epoch': 0.76} + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:35<21:22, 17.81s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:35<21:22, 17.81s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:44,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:44,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:44,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2234, 'learning_rate': 4.5e-06, 'epoch': 0.76} +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:50:53,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2594, 'learning_rate': 4.520000000000001e-06, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:09<20:22, 17.46s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1951, 'learning_rate': 4.540000000000001e-06, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:26<19:54, 17.31s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:43<19:29, 17.20s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:43<19:29, 17.20s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2091, 'learning_rate': 4.56e-06, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:43<19:29, 17.20s/it]g-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:51:52,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:51:52,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:51:52,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:51:52,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:51:52,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:46:05,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:00<19:05, 17.09s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3193, 'learning_rate': 4.600000000000001e-06, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:16<18:36, 16.92s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1905, 'learning_rate': 4.620000000000001e-06, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:33<18:06, 16.72s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:49<17:40, 16.57s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:05<17:12, 16.39s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:20<16:39, 16.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:20<16:39, 16.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:20<16:39, 16.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:20<16:39, 16.13s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:53:30,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:53:30,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:53:30,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1503, 'learning_rate': 4.7e-06, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:36<16:09, 15.89s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:04:51<15:40, 15.68s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:04:51<15:40, 15.68s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:04:51<15:40, 15.68s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:04:51<15:40, 15.68s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|████████████████████████████���██████████████████████████████████ | 237/297 [1:04:51<15:40, 15.68s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:04:51<15:40, 15.68s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:05,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:06<15:17, 15.54s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:06<15:17, 15.54s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3807, 'learning_rate': 4.74e-06, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:06<15:17, 15.54s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:06<15:17, 15.54s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:06<15:17, 15.54s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:06<15:17, 15.54s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:19,552 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:20<14:38, 15.15s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:20<14:38, 15.15s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2229, 'learning_rate': 4.76e-06, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:20<14:38, 15.15s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:20<14:38, 15.15s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:29,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:29,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:29,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:29,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 240/297 [1:05:34<13:57, 14.69s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:38,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:38,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:38,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:38,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:46,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 241/297 [1:05:47<13:15, 14.20s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 241/297 [1:05:47<13:15, 14.20s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3808, 'learning_rate': 4.800000000000001e-06, 'epoch': 0.81} +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:52,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:52,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:52,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:54:58,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 242/297 [1:05:59<12:29, 13.63s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 242/297 [1:05:59<12:29, 13.63s/it]g-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3217, 'learning_rate': 4.8200000000000004e-06, 'epoch': 0.81} +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:04,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:04,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:08,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:08,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:08,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:52:02,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 243/297 [1:06:11<11:40, 12.97s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:13,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 243/297 [1:06:11<11:40, 12.97s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:13,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:17,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:13,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:17,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:13,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:20,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:13,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 244/297 [1:06:21<10:47, 12.21s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:23,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 244/297 [1:06:21<10:47, 12.21s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:23,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2872, 'learning_rate': 4.86e-06, 'epoch': 0.82} +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:27,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:23,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:23,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:23,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:29,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:23,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|█████████████████████████████████████████████████████████████████▏ | 245/297 [1:06:31<09:53, 11.40s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:32,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:35,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:32,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:37,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:32,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:39,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:32,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 246/297 [1:06:39<08:58, 10.56s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:41,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 246/297 [1:06:39<08:58, 10.56s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:41,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:43,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:41,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:45,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:41,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:46,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:41,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:46,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:41,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 247/297 [1:06:47<08:03, 9.67s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:48,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:50,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:48,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:52,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:48,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|█████████████████████████████████████████████████████████████████▉ | 248/297 [1:06:53<07:09, 8.77s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:55,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|█████████████████████████████████████████████████████████████████▉ | 248/297 [1:06:53<07:09, 8.77s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:55:55,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:56,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:55,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:59,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:55,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:55:59,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:55:55,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▏ | 249/297 [1:06:59<06:14, 7.79s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:56:00,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:56:02,998 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:00,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:07:04<05:29, 7.00s/it]g-point operations will not be computed-28 21:56:00,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:07:04<05:29, 7.00s/it]g-point operations will not be computed-28 21:56:00,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:07:04<05:29, 7.00s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:07:04<05:29, 7.00s/it][WARNING|modeling_utils.py:388] 2022-02-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:56:13,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:56:13,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:56:18,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:56:18,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 21:56:23,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2843, 'learning_rate': 5e-06, 'epoch': 0.84} + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:25<08:37, 11.26s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:46<10:29, 13.98s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|████████████████████████████████████████████████████████████████���██▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:06<11:37, 15.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:26<12:12, 17.03s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1911, 'learning_rate': 5.0800000000000005e-06, 'epoch': 0.86} + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:45<12:27, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:05<12:31, 18.34s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3132, 'learning_rate': 5.12e-06, 'epoch': 0.86} + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1924, 'learning_rate': 5.140000000000001e-06, 'epoch': 0.87} + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:24<12:25, 18.65s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2292, 'learning_rate': 5.1600000000000006e-06, 'epoch': 0.87} + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:02<11:58, 18.90s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1601, 'learning_rate': 5.18e-06, 'epoch': 0.87} + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2462, 'learning_rate': 5.2e-06, 'epoch': 0.88} + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:40<11:20, 18.91s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2751, 'learning_rate': 5.220000000000001e-06, 'epoch': 0.88} + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|██████████████████████████████████████████████████���██████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:59<11:00, 18.86s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:18<10:44, 18.96s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:37<10:22, 18.85s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2005, 'learning_rate': 5.28e-06, 'epoch': 0.89} + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|████████████████████████████████████████████████████████████████████���█▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:55<09:58, 18.69s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2848, 'learning_rate': 5.300000000000001e-06, 'epoch': 0.89} + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████��███████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:12:13<09:34, 18.54s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:31<09:10, 18.35s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:49<08:49, 18.25s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1393, 'learning_rate': 5.36e-06, 'epoch': 0.9} + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████��███████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:07<08:28, 18.15s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2643, 'learning_rate': 5.380000000000001e-06, 'epoch': 0.91} + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:13:25<08:07, 18.07s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1311, 'learning_rate': 5.400000000000001e-06, 'epoch': 0.91} + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1079, 'learning_rate': 5.420000000000001e-06, 'epoch': 0.91} + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██���█████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:00<07:25, 17.81s/it]g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1737, 'learning_rate': 5.4400000000000004e-06, 'epoch': 0.92} +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:03:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 21:56:08,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:14:35<06:44, 17.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:53<06:27, 17.62s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:10<06:06, 17.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2165, 'learning_rate': 5.5200000000000005e-06, 'epoch': 0.93} + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:27<05:46, 17.32s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1548, 'learning_rate': 5.540000000000001e-06, 'epoch': 0.93} + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|███████████████████████████████████████████████████████���█████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:44<05:26, 17.16s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:00<05:05, 16.99s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|███████████████████████████████████████���██████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:17<04:46, 16.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:33<04:27, 16.69s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|████��██████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:49<04:06, 16.45s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:04<03:46, 16.17s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:04<03:46, 16.17s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:04<03:46, 16.17s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:13,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:13,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:13,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:13,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:13,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:17:19<03:25, 15.85s/it]g-point operations will not be computed-28 22:03:38,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|████████████████████████████████████████████████████████████████���██████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:34<03:06, 15.58s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|████████████████████████████████████████████████████████████████████████████ | 286/297 [1:17:49<02:48, 15.31s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:06:53,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:06:37,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:03<02:29, 14.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:03<02:29, 14.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:03<02:29, 14.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:03<02:29, 14.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:03<02:29, 14.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:03<02:29, 14.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:18,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:18,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2076, 'learning_rate': 5.74e-06, 'epoch': 0.97} +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:18,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:18,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:26,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:26,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:26,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:26,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▊ | 289/297 [1:18:30<01:52, 14.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▊ | 289/297 [1:18:30<01:52, 14.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▊ | 289/297 [1:18:30<01:52, 14.07s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:38,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:38,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:42,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:42,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2955, 'learning_rate': 5.78e-06, 'epoch': 0.98} +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:46,926 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:46,926 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:50,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:50,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:50,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:32,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▍ | 291/297 [1:18:52<01:15, 12.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▍ | 291/297 [1:18:52<01:15, 12.61s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:07:58,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:01,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:03,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:03,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3609, 'learning_rate': 5.82e-06, 'epoch': 0.98} +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:06,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:08,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:11,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:07:54,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|█████████████████████████████████████████████████████████████████████████████▉ | 293/297 [1:19:11<00:43, 10.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:13,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|█████████████████████████████████████████████��███████████████████████████████▉ | 293/297 [1:19:11<00:43, 10.85s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:13,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:15,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:13,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:16,994 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:13,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:18,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:13,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▏| 294/297 [1:19:19<00:29, 9.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:20,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▏| 294/297 [1:19:19<00:29, 9.89s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:20,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:22,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:20,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:25,641 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:20,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▍| 295/297 [1:19:25<00:17, 8.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:27,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▍| 295/297 [1:19:25<00:17, 8.93s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:27,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:28,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:27,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:30,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:27,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|██████████████████████████████████████████████████████████████████████████████▋| 296/297 [1:19:31<00:08, 8.01s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:33,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|██████████████████████████████████████████████████████████████████████████████▋| 296/297 [1:19:31<00:08, 8.01s/it][WARNING|modeling_utils.py:388] 2022-02-28 22:08:33,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-02-28 22:08:34,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-28 22:08:33,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2114] 2022-02-28 22:08:37,283 >> Saving model checkpoint to ./=)███| 297/297 [1:19:36<00:00, 7.08s/it][INFO|trainer.py:1492] 2022-02-28 22:08:37,281 >> 3,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2114] 2022-02-28 22:08:37,283 >> Saving model checkpoint to ./=)███| 297/297 [1:19:36<00:00, 7.08s/it][INFO|trainer.py:1492] 2022-02-28 22:08:37,281 >> 3,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5598, 'learning_rate': 5.92e-06, 'epoch': 1.0} +[INFO|trainer.py:2114] 2022-02-28 22:08:53,163 >> Saving model checkpoint to ./ ./pytorch_model.bin:36<00:00, 7.08s/it][INFO|trainer.py:1492] 2022-02-28 22:08:37,281 >> 3,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|modeling_utils.py:1081] 2022-02-28 22:09:09,451 >> Model weights saved in ./pytorch_model.bin:36<00:00, 7.08s/it][INFO|trainer.py:1492] 2022-02-28 22:08:37,281 >> 3,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|modeling_utils.py:1081] 2022-02-28 22:09:09,451 >> Model weights saved in ./pytorch_model.bin:36<00:00, 7.08s/it][INFO|trainer.py:1492] 2022-02-28 22:08:37,281 >> 3,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Adding files tracked by Git LFS: ['wandb/run-20220228_204859-8xn2plkx/run-8xn2plkx.wandb']. This may take a bit of time if the files are large.2022-02-28 22:08:37,281 >> 3,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed