diff --git "a/wandb/run-20220302_233655-33dtvgaa/files/output.log" "b/wandb/run-20220302_233655-33dtvgaa/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220302_233655-33dtvgaa/files/output.log" @@ -0,0 +1,1706 @@ + + + 0%| | 0/254 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:03,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:06,555 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:09,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:12,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:15,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:19,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7722, 'learning_rate': 6e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:22,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 1/254 [00:25<1:49:37, 26.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:37:25,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:28,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:31,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:34,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:37,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:40,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:43,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7898, 'learning_rate': 1.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:46,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 2/254 [00:50<1:44:31, 24.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:37:49,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:52,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:55,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:37:58,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:01,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:04,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:07,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:10,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9156, 'learning_rate': 1.8e-06, 'epoch': 0.01} + + 1%|▉ | 3/254 [01:13<1:41:48, 24.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:38:13,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:16,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:19,110 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:22,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:24,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:27,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:30,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8726, 'learning_rate': 2.4e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:33,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 4/254 [01:37<1:39:34, 23.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:38:36,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:39,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:42,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:45,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:48,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:50,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:53,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:38:56,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 5/254 [02:00<1:37:54, 23.59s/it] + + 2%|█▌ | 5/254 [02:00<1:37:54, 23.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:38:59,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:02,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:05,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:08,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:10,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:13,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:16,572 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:19,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 6/254 [02:22<1:36:30, 23.35s/it] + + 2%|█▉ | 6/254 [02:22<1:36:30, 23.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:39:22,403 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:25,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:28,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:30,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:33,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:36,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:39,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7939, 'learning_rate': 3.6e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:42,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 7/254 [02:45<1:35:25, 23.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:39:45,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:48,078 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:50,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:53,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:56,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:39:59,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:02,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8651, 'learning_rate': 4.2e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:04,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 8/254 [03:08<1:34:27, 23.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:40:07,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:10,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:13,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:16,300 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:19,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:22,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:24,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:27,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 9/254 [03:31<1:33:36, 22.93s/it] + + 4%|██▊ | 9/254 [03:31<1:33:36, 22.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:40:30,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:33,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:36,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:39,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:41,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:44,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:47,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:50,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 10/254 [03:53<1:32:43, 22.80s/it] + + 4%|███▏ | 10/254 [03:53<1:32:43, 22.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:40:53,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:55,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:40:58,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:01,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:04,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:06,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:09,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:12,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 11/254 [04:15<1:31:30, 22.60s/it] + + 4%|███▍ | 11/254 [04:15<1:31:30, 22.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:15,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:17,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:20,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:23,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:25,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:28,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:31,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:34,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6487, 'learning_rate': 6.599999999999999e-06, 'epoch': 0.05} + + 5%|███▊ | 12/254 [04:37<1:30:12, 22.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:36,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:39,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:42,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:45,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:48,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:51,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:53,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:41:56,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [05:00<1:30:00, 22.41s/it] + 5%|████ | 13/254 [05:00<1:30:00, 22.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [05:00<1:30:00, 22.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:04,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:04,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:10,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:10,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:15,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:15,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:21<1:28:47, 22.20s/it]g-point operations will not be computed-02 23:41:59,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:21<1:28:47, 22.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:21<1:28:47, 22.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:26,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:26,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:31,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:31,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:37,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it]g-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it]g-point operations will not be computed-02 23:42:21,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:43<1:27:30, 21.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:47,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:47,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:53,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:53,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:42:58,626 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it]g-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it]g-point operations will not be computed-02 23:42:42,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [06:04<1:26:35, 21.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:09,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:09,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:14,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:14,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:20,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it]g-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it]g-point operations will not be computed-02 23:43:04,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:26<1:25:39, 21.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:30,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:30,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:35,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:35,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:41,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:41,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:47<1:24:31, 21.49s/it]g-point operations will not be computed-02 23:43:25,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:47<1:24:31, 21.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:47<1:24:31, 21.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:51,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:51,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:57,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:43:57,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:02,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:02,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:08<1:23:41, 21.37s/it]g-point operations will not be computed-02 23:43:46,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:08<1:23:41, 21.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:08<1:23:41, 21.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:12,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:12,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:17,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:17,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:29<1:22:39, 21.20s/it]g-point operations will not be computed-02 23:44:07,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:29<1:22:39, 21.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:29<1:22:39, 21.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:33,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:33,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:38,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:38,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:43,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it]g-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it]g-point operations will not be computed-02 23:44:28,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:49<1:21:35, 21.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:53,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:53,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:59,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:44:59,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:04,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:04,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:04,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:10<1:20:35, 20.84s/it]g-point operations will not be computed-02 23:44:48,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:10<1:20:35, 20.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:10<1:20:35, 20.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:14,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:14,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:19,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:19,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:24,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:24,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:30<1:19:48, 20.73s/it]g-point operations will not be computed-02 23:45:09,241 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:30<1:19:48, 20.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:30<1:19:48, 20.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:34,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:34,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:39,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:39,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:44,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:44,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:51<1:19:06, 20.64s/it]g-point operations will not be computed-02 23:45:29,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:51<1:19:06, 20.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:51<1:19:06, 20.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:55,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:45:55,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:46:00,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:46:00,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:46:05,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3355, 'learning_rate': 1.44e-05, 'epoch': 0.1} + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:11<1:18:49, 20.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4344, 'learning_rate': 1.4999999999999999e-05, 'epoch': 0.1} + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:31<1:17:44, 20.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2896, 'learning_rate': 1.5599999999999996e-05, 'epoch': 0.11} + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|██��█████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3127, 'learning_rate': 1.6199999999999997e-05, 'epoch': 0.11} + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:51<1:16:36, 20.25s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2021, 'learning_rate': 1.68e-05, 'epoch': 0.11} + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:30<1:14:42, 19.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.386, 'learning_rate': 1.74e-05, 'epoch': 0.12} + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 30/254 [10:50<1:13:46, 19.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:09<1:12:52, 19.61s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:28<1:11:54, 19.43s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3241, 'learning_rate': 1.92e-05, 'epoch': 0.13} + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 33/254 [11:47<1:10:52, 19.24s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2998, 'learning_rate': 1.98e-05, 'epoch': 0.13} + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 34/254 [12:05<1:09:31, 18.96s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:23<1:08:15, 18.70s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.308, 'learning_rate': 2.1e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:49:37,730 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3661, 'learning_rate': 2.1599999999999996e-05, 'epoch': 0.15} + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:58<1:05:30, 18.11s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:06,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:16<1:04:31, 17.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:16<1:04:31, 17.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:16<1:04:31, 17.92s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:21,058 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3651, 'learning_rate': 2.28e-05, 'epoch': 0.15} + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:32<1:02:33, 17.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▌ | 40/254 [13:48<1:00:11, 16.88s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:50:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [14:02<57:32, 16.21s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [14:02<57:32, 16.21s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [14:02<57:32, 16.21s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:06,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 42/254 [14:16<54:38, 15.46s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:16,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:24,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:24,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:29<51:31, 14.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:29<51:31, 14.65s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:30,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:30,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:30,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:35,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:35,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████▏ | 44/254 [14:40<48:09, 13.76s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:40,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:40,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:44,028 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:46,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:46,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 45/254 [14:51<44:23, 12.75s/it]g-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:50,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:52,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:54,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:54,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:51:54,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:45:50,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▊ | 46/254 [15:00<40:40, 11.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:00,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:02,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:04,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:04,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:51:58,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 47/254 [15:09<36:59, 10.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:08,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:10,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 48/254 [15:16<33:14, 9.68s/it]g-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 48/254 [15:16<33:14, 9.68s/it]g-point operations will not be computed-02 23:52:06,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▍ | 48/254 [15:16<33:14, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:16,835 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:18,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:18,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:13,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▊ | 49/254 [15:22<29:38, 8.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:22,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it]g-point operations will not be computed-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it]g-point operations will not be computed-02 23:52:19,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 50/254 [15:28<26:22, 7.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:34,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:34,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:40,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:40,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 23:52:40,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.243, 'learning_rate': 2.9999999999999997e-05, 'epoch': 0.2} + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:52<43:30, 12.86s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.181, 'learning_rate': 3.06e-05, 'epoch': 0.2} + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:17<54:35, 16.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:40<1:01:42, 18.42s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [17:03<1:06:13, 19.87s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:27<1:09:15, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1373, 'learning_rate': 3.2999999999999996e-05, 'epoch': 0.22} + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:50<1:11:01, 21.52s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1799, 'learning_rate': 3.36e-05, 'epoch': 0.22} + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [18:12<1:11:51, 21.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1919, 'learning_rate': 3.42e-05, 'epoch': 0.23} + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:58<1:12:38, 22.35s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2191, 'learning_rate': 3.539999999999999e-05, 'epoch': 0.24} + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|████████████��█████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▉ | 60/254 [19:21<1:12:31, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2233, 'learning_rate': 3.5999999999999994e-05, 'epoch': 0.24} + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:43<1:12:09, 22.43s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1567, 'learning_rate': 3.6599999999999995e-05, 'epoch': 0.24} + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 62/254 [20:05<1:11:41, 22.41s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2383, 'learning_rate': 3.7199999999999996e-05, 'epoch': 0.25} + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:28<1:11:35, 22.49s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2146, 'learning_rate': 3.78e-05, 'epoch': 0.25} + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████��████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:50<1:10:51, 22.38s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1731, 'learning_rate': 3.84e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [21:12<1:10:01, 22.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1804, 'learning_rate': 3.96e-05, 'epoch': 0.26} + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:34<1:09:04, 22.04s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [22:16<1:07:17, 21.71s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2335, 'learning_rate': 4.08e-05, 'epoch': 0.27} + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████���███████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:38<1:06:40, 21.63s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:59<1:05:48, 21.46s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1473, 'learning_rate': 4.2e-05, 'epoch': 0.28} + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [23:20<1:04:56, 21.29s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:41<1:04:10, 21.16s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [24:01<1:03:17, 20.98s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:22<1:02:40, 20.89s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2113, 'learning_rate': 4.4399999999999995e-05, 'epoch': 0.29} + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:43<1:02:17, 20.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2269, 'learning_rate': 4.4999999999999996e-05, 'epoch': 0.3} + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [25:03<1:01:27, 20.72s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1723, 'learning_rate': 4.56e-05, 'epoch': 0.3} + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1948, 'learning_rate': 4.62e-05, 'epoch': 0.31} + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|██████��█████████████████▎ | 77/254 [25:23<1:00:28, 20.50s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0934, 'learning_rate': 4.68e-05, 'epoch': 0.31} + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [26:02<58:32, 20.07s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2371, 'learning_rate': 4.7399999999999993e-05, 'epoch': 0.31} + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████���████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [26:22<57:31, 19.84s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1495, 'learning_rate': 4.7999999999999994e-05, 'epoch': 0.32} + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:41<56:39, 19.65s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1826, 'learning_rate': 4.8599999999999995e-05, 'epoch': 0.32} + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [27:00<55:34, 19.39s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [27:19<54:46, 19.22s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1952, 'learning_rate': 4.98e-05, 'epoch': 0.33} + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:37<53:36, 18.92s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:04:44,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:54<52:14, 18.55s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.149, 'learning_rate': 5.1e-05, 'epoch': 0.34} + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [28:12<51:03, 18.23s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████ | 87/254 [28:29<49:45, 17.88s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:46<48:48, 17.64s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [29:02<47:14, 17.18s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:12,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2526, 'learning_rate': 5.339999999999999e-05, 'epoch': 0.35} + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [29:17<45:24, 16.61s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:27,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2178, 'learning_rate': 5.399999999999999e-05, 'epoch': 0.36} + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:32<43:18, 15.94s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:37,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 92/254 [29:45<40:57, 15.17s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:45,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:45,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:45,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:51,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:51,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:51,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 93/254 [29:58<38:33, 14.37s/it]g-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:57,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:06:57,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:01,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:01,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:06,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:06,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2793, 'learning_rate': 5.5799999999999994e-05, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:10,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:10,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:13,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:13,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:13,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 23:52:27,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▋ | 95/254 [30:19<33:12, 12.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:20,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:20,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:23,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:25,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:25,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:27,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:29,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:31,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:33,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:33,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:35,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:37,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:39,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:39,149 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:40,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:43,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:45,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:45,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:48,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:49,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:50,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:50,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:52,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:52,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:58,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:07:58,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:08:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2692, 'learning_rate': 5.9999999999999995e-05, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [31:20<32:20, 12.68s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:44<40:36, 16.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [32:07<45:44, 18.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1882, 'learning_rate': 6.18e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [32:30<49:06, 19.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2571, 'learning_rate': 6.239999999999999e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:53<51:14, 20.63s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████���██▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [33:16<52:27, 21.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0725, 'learning_rate': 6.359999999999999e-05, 'epoch': 0.42} + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [34:01<53:31, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2084, 'learning_rate': 6.479999999999999e-05, 'epoch': 0.43} + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:46<53:08, 22.14s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [35:08<52:30, 22.03s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [35:30<52:06, 22.02s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0844, 'learning_rate': 6.72e-05, 'epoch': 0.44} + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.172, 'learning_rate': 6.78e-05, 'epoch': 0.45} + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [36:14<51:19, 22.00s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.188, 'learning_rate': 6.84e-05, 'epoch': 0.45} + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [36:35<50:42, 21.89s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:57<49:56, 21.72s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [37:18<49:19, 21.60s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1547, 'learning_rate': 7.02e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:39<48:33, 21.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [38:00<47:56, 21.31s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [38:21<47:17, 21.17s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1361, 'learning_rate': 7.199999999999999e-05, 'epoch': 0.47} + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:41<46:29, 20.97s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2158, 'learning_rate': 7.259999999999999e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [39:02<45:44, 20.79s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [39:22<45:03, 20.64s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:42<44:19, 20.46s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|████████████��██████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2207, 'learning_rate': 7.439999999999999e-05, 'epoch': 0.49} + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [40:03<44:04, 20.50s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|███████��████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2011, 'learning_rate': 7.56e-05, 'epoch': 0.5} + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [40:22<43:18, 20.30s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|��███████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.16, 'learning_rate': 7.62e-05, 'epoch': 0.5} + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [41:02<41:57, 19.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [41:21<41:18, 19.82s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2454, 'learning_rate': 7.74e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:41<40:38, 19.66s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:59<39:50, 19.43s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1064, 'learning_rate': 7.86e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [42:18<39:10, 19.27s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1765, 'learning_rate': 7.92e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [42:37<38:31, 19.10s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.073, 'learning_rate': 7.98e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:55<37:42, 18.85s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2656, 'learning_rate': 8.04e-05, 'epoch': 0.53} + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [43:13<36:59, 18.65s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.214, 'learning_rate': 8.1e-05, 'epoch': 0.53} + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [43:31<36:02, 18.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1251, 'learning_rate': 8.16e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:48<35:03, 17.98s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0177, 'learning_rate': 8.22e-05, 'epoch': 0.54} + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [44:06<34:20, 17.77s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1159, 'learning_rate': 8.28e-05, 'epoch': 0.55} + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████��███████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [44:22<33:13, 17.33s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [44:37<31:47, 16.73s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [44:37<31:47, 16.73s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 140/254 [44:37<31:47, 16.73s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:41,502 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:52<30:16, 16.08s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:52<30:16, 16.08s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:52<30:16, 16.08s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:21:55,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 142/254 [45:05<28:36, 15.32s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:05,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:05,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:05,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:11,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 143/254 [45:18<26:53, 14.54s/it]g-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:18,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:18,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:18,005 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:23,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:07:17,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [45:30<25:02, 13.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [45:30<25:02, 13.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:31,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:34,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:34,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:34,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:27,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 145/254 [45:40<22:55, 12.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:40,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:42,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:42,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:42,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:45,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:47,994 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:50,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:51,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:53,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:53,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:55,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:57,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:59,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:22:59,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:02,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:04,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:05,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:05,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:08,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:10,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:12,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:12,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5587, 'learning_rate': 8.939999999999999e-05, 'epoch': 0.59} +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:19,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:19,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:25,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:25,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:31,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:23:31,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2288, 'learning_rate': 8.999999999999999e-05, 'epoch': 0.59} + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [46:40<21:51, 12.73s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.228, 'learning_rate': 9.059999999999999e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|███████████████████████████████████���████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [47:04<27:16, 16.05s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2054, 'learning_rate': 9.12e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [47:28<30:45, 18.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:51<32:51, 19.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████���███████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [48:14<34:05, 20.66s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [48:36<34:44, 21.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:59<35:02, 21.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [49:22<35:10, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1494, 'learning_rate': 9.479999999999999e-05, 'epoch': 0.62} + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1828, 'learning_rate': 9.539999999999999e-05, 'epoch': 0.63} + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 159/254 [49:44<35:01, 22.12s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|█████████████████████████████████��█████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [50:28<34:18, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [50:51<33:57, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2278, 'learning_rate': 9.719999999999999e-05, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████��███▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [51:13<33:49, 22.30s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [51:35<33:12, 22.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1873, 'learning_rate': 9.839999999999999e-05, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:57<32:37, 21.99s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1381, 'learning_rate': 9.9e-05, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 166/254 [52:18<32:03, 21.86s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [52:40<31:29, 21.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1664, 'learning_rate': 0.0001002, 'epoch': 0.66} + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [53:01<30:54, 21.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1571, 'learning_rate': 0.0001008, 'epoch': 0.66} + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [53:43<29:43, 21.23s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1581, 'learning_rate': 0.000102, 'epoch': 0.67} + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|████████████████████████████████████���█████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [54:03<29:10, 21.09s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0611, 'learning_rate': 0.0001026, 'epoch': 0.67} + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|████████████████████████████���█████████████████████████▊ | 172/254 [54:24<28:37, 20.95s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [54:44<28:05, 20.80s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████��██████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [55:05<27:34, 20.68s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████���██████████████████▊ | 175/254 [55:26<27:16, 20.72s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1462, 'learning_rate': 0.00010499999999999999, 'epoch': 0.69} + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [55:46<26:40, 20.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1756, 'learning_rate': 0.00010559999999999998, 'epoch': 0.69} + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [56:06<26:07, 20.35s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1378, 'learning_rate': 0.00010619999999999998, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [56:25<25:30, 20.14s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0316, 'learning_rate': 0.00010679999999999998, 'epoch': 0.7} + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [56:45<24:57, 19.96s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2297, 'learning_rate': 0.00010739999999999998, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|███████████████████████████��█████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 180/254 [57:04<24:22, 19.76s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2747, 'learning_rate': 0.00010799999999999998, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [57:24<24:06, 19.81s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [57:43<23:25, 19.52s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [58:02<22:47, 19.27s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 184/254 [58:20<22:08, 18.98s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2607, 'learning_rate': 0.00011039999999999999, 'epoch': 0.73} + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [58:38<21:33, 18.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1508, 'learning_rate': 0.00011099999999999999, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [58:56<20:57, 18.49s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1362, 'learning_rate': 0.00011159999999999999, 'epoch': 0.73} + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [59:14<20:32, 18.39s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [59:33<20:29, 18.63s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████���███████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [59:51<19:59, 18.45s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████���███████████████████████████████████ | 190/254 [1:00:08<19:07, 17.93s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▍ | 191/254 [1:00:24<18:13, 17.36s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:32,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:32,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2683, 'learning_rate': 0.0001146, 'epoch': 0.75} + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▋ | 192/254 [1:00:39<17:06, 16.56s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:46,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:46,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:46,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 193/254 [1:00:53<16:00, 15.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 193/254 [1:00:53<16:00, 15.74s/it]g-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:37:54,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:01,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:01,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3034, 'learning_rate': 0.0001158, 'epoch': 0.76} +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:06,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:06,394 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:10,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:10,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:10,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:22:37,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 195/254 [1:01:16<13:28, 13.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 195/254 [1:01:16<13:28, 13.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:18,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:18,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:22,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:22,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:14,716 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:01:27<12:15, 12.68s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:01:27<12:15, 12.68s/it][WARNING|modeling_utils.py:388] 2022-03-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:28,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:30,531 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:32,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:32,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:34,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:36,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:38,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:40,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:40,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:42,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:43,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:47,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:47,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:48,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:50,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:51,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:51,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 00:38:53,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 00:38:24,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file