diff --git "a/wandb/run-20220302_200036-31e4k99c/files/output.log" "b/wandb/run-20220302_200036-31e4k99c/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220302_200036-31e4k99c/files/output.log" @@ -0,0 +1,2443 @@ + + + 0%| | 0/297 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:00:44,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:00:46,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:00:49,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:00:52,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:00:54,858 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:00:57,483 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7698, 'learning_rate': 6e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:00,046 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 1/297 [00:22<1:50:22, 22.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:01:03,019 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:05,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:08,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:10,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:13,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:15,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:18,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8469, 'learning_rate': 1.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:21,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 2/297 [00:43<1:45:36, 21.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:01:23,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:26,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:28,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:31,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:33,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:36,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:38,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:41,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 3/297 [01:03<1:42:50, 20.99s/it] + + 1%|▊ | 3/297 [01:03<1:42:50, 20.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:01:44,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:46,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:49,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:51,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:54,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:56,844 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:01:59,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:01,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 4/297 [01:24<1:41:17, 20.74s/it] + + 1%|█ | 4/297 [01:24<1:41:17, 20.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:02:04,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:07,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:09,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:12,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:14,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:17,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:19,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:22,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 5/297 [01:44<1:40:11, 20.59s/it] + + 2%|█▎ | 5/297 [01:44<1:40:11, 20.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:02:24,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:27,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:29,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:32,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:34,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:37,278 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:39,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8118, 'learning_rate': 2.9999999999999997e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:42,224 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▋ | 6/297 [02:04<1:38:53, 20.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:02:44,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:47,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:49,744 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:52,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:54,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:57,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:02:59,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:02,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 7/297 [02:24<1:37:42, 20.22s/it] + + 2%|█▉ | 7/297 [02:24<1:37:42, 20.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:03:04,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:07,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:09,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:12,080 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:14,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:17,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:19,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:21,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 8/297 [02:44<1:36:57, 20.13s/it] + + 3%|██▏ | 8/297 [02:44<1:36:57, 20.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:03:24,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:27,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:29,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:31,910 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:34,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:36,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:39,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7309, 'learning_rate': 4.8e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:41,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▍ | 9/297 [03:03<1:36:09, 20.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:03:44,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:46,844 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:49,281 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:51,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:54,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:56,580 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:03:58,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:01,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 10/297 [03:23<1:35:12, 19.91s/it] + + 3%|██▋ | 10/297 [03:23<1:35:12, 19.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:04:04,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:06,397 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:08,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:11,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:13,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:16,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:18,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6528, 'learning_rate': 5.999999999999999e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:20,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▉ | 11/297 [03:43<1:34:15, 19.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:04:23,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:25,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:28,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:30,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:33,023 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:35,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:37,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.662, 'learning_rate': 6.599999999999999e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:40,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███▏ | 12/297 [04:02<1:33:12, 19.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:04:42,704 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:45,049 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:47,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:50,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:52,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:55,140 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:57,558 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:04:59,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 13/297 [04:22<1:33:10, 19.68s/it] + 4%|███▌ | 13/297 [04:22<1:33:10, 19.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 13/297 [04:22<1:33:10, 19.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:07,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:07,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:12,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:17,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:41<1:32:30, 19.61s/it]g-point operations will not be computed-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:41<1:32:30, 19.61s/it]g-point operations will not be computed-02 20:05:02,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:41<1:32:30, 19.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:41<1:32:30, 19.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:26,704 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:26,704 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:31,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:31,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:36,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [05:00<1:31:21, 19.44s/it]g-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [05:00<1:31:21, 19.44s/it]g-point operations will not be computed-02 20:05:21,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [05:00<1:31:21, 19.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [05:00<1:31:21, 19.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:45,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:45,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:50,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:50,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:54,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:05:54,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:19<1:30:13, 19.27s/it]g-point operations will not be computed-02 20:05:40,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:19<1:30:13, 19.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:19<1:30:13, 19.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:04,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:04,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:09,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:09,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:13,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:38<1:29:11, 19.11s/it]g-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:38<1:29:11, 19.11s/it]g-point operations will not be computed-02 20:05:59,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:38<1:29:11, 19.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:38<1:29:11, 19.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:23,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:27,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:27,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:32,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:56<1:28:03, 18.94s/it]g-point operations will not be computed-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:56<1:28:03, 18.94s/it]g-point operations will not be computed-02 20:06:18,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:56<1:28:03, 18.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:56<1:28:03, 18.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:41,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:41,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:46,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:46,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:06:50,775 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:15<1:27:00, 18.78s/it]g-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:15<1:27:00, 18.78s/it]g-point operations will not be computed-02 20:06:36,974 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:15<1:27:00, 18.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:15<1:27:00, 18.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:00,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:00,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:04,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:04,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:09,225 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:33<1:26:14, 18.68s/it]g-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:33<1:26:14, 18.68s/it]g-point operations will not be computed-02 20:06:55,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:33<1:26:14, 18.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:33<1:26:14, 18.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:18,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:18,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:22,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:22,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:27,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:27,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:51<1:25:19, 18.55s/it]g-point operations will not be computed-02 20:07:13,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:51<1:25:19, 18.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:51<1:25:19, 18.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:36,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:36,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:41,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:41,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:09<1:24:18, 18.40s/it]g-point operations will not be computed-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:09<1:24:18, 18.40s/it]g-point operations will not be computed-02 20:07:32,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:09<1:24:18, 18.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:09<1:24:18, 18.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:54,635 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:54,635 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:59,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:07:59,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:03,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:27<1:23:30, 18.29s/it]g-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:27<1:23:30, 18.29s/it]g-point operations will not be computed-02 20:07:50,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:27<1:23:30, 18.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:27<1:23:30, 18.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:12,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:12,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:16,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:16,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:21,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:45<1:22:31, 18.14s/it]g-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:45<1:22:31, 18.14s/it]g-point operations will not be computed-02 20:08:08,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:45<1:22:31, 18.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:45<1:22:31, 18.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:30,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:30,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:34,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:34,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:39,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [08:03<1:22:15, 18.14s/it]g-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [08:03<1:22:15, 18.14s/it]g-point operations will not be computed-02 20:08:25,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [08:03<1:22:15, 18.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [08:03<1:22:15, 18.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:48,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:48,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:52,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:52,759 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:08:57,099 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it]g-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it]g-point operations will not be computed-02 20:08:44,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:21<1:21:08, 17.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3171, 'learning_rate': 1.5599999999999996e-05, 'epoch': 0.09} + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:38<1:20:05, 17.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2827, 'learning_rate': 1.6199999999999997e-05, 'epoch': 0.09} + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 29/297 [09:13<1:18:06, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 29/297 [09:13<1:18:06, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1956, 'learning_rate': 1.68e-05, 'epoch': 0.1} + 10%|███████▊ | 29/297 [09:13<1:18:06, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 29/297 [09:13<1:18:06, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:10:01,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:10:01,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:10:01,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3854, 'learning_rate': 1.74e-05, 'epoch': 0.1} + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:30<1:17:02, 17.31s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3831, 'learning_rate': 1.7999999999999997e-05, 'epoch': 0.1} + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:46<1:16:03, 17.15s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [10:03<1:15:11, 17.03s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:20<1:14:17, 16.89s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:36<1:13:02, 16.66s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:52<1:11:46, 16.44s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [11:07<1:10:30, 16.21s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [11:07<1:10:30, 16.21s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [11:07<1:10:30, 16.21s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [11:07<1:10:30, 16.21s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [11:07<1:10:30, 16.21s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [11:07<1:10:30, 16.21s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:11:59,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3836, 'learning_rate': 2.1599999999999996e-05, 'epoch': 0.12} + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:23<1:09:02, 15.93s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:38<1:08:01, 15.76s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:38<1:08:01, 15.76s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:38<1:08:01, 15.76s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:38<1:08:01, 15.76s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:25,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:25,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:25,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:25,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:53<1:06:06, 15.37s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:53<1:06:06, 15.37s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:53<1:06:06, 15.37s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:38,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:38,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:38,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:38,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:38,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [12:06<1:03:51, 14.91s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:48,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:48,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:48,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:48,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:12:56,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 41/297 [12:19<1:01:18, 14.37s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 41/297 [12:19<1:01:18, 14.37s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3261, 'learning_rate': 2.3999999999999997e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:02,641 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:02,641 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:02,641 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:08,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 42/297 [12:32<58:21, 13.73s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 42/297 [12:32<58:21, 13.73s/it]g-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3655, 'learning_rate': 2.4599999999999998e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:14,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:14,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:18,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:18,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:18,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:09:01,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 43/297 [12:43<54:56, 12.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:13:22,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 43/297 [12:43<54:56, 12.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:13:22,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:26,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:22,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:26,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:22,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:30,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:22,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:30,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:22,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 44/297 [12:53<51:23, 12.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 44/297 [12:53<51:23, 12.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:36,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:38,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:41,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:41,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4049, 'learning_rate': 2.6399999999999995e-05, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:44,515 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:46,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:48,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:48,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:32,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▋ | 46/297 [13:11<43:55, 10.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:13:50,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:52,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:50,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:54,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:50,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 47/297 [13:19<40:02, 9.61s/it]g-point operations will not be computed-02 20:13:50,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 47/297 [13:19<40:02, 9.61s/it]g-point operations will not be computed-02 20:13:50,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:13:59,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:58,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:01,523 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:13:58,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▎ | 48/297 [13:26<36:18, 8.75s/it]g-point operations will not be computed-02 20:13:58,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▎ | 48/297 [13:26<36:18, 8.75s/it]g-point operations will not be computed-02 20:13:58,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▎ | 48/297 [13:26<36:18, 8.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:14:04,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:06,242 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:04,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:09,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:04,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:09,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:04,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▌ | 49/297 [13:31<32:37, 7.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:14:10,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:12,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:10,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:37<29:22, 7.14s/it]g-point operations will not be computed-02 20:14:10,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:37<29:22, 7.14s/it]g-point operations will not be computed-02 20:14:10,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:37<29:22, 7.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▊ | 50/297 [13:37<29:22, 7.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:23,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:23,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:28,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:28,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:14:34,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2871, 'learning_rate': 2.9999999999999997e-05, 'epoch': 0.17} + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1845, 'learning_rate': 3.06e-05, 'epoch': 0.17} + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:19<58:10, 14.25s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.163, 'learning_rate': 3.119999999999999e-05, 'epoch': 0.18} + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:40<1:05:32, 16.12s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1515, 'learning_rate': 3.1799999999999994e-05, 'epoch': 0.18} + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [15:00<1:10:19, 17.36s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1402, 'learning_rate': 3.2999999999999996e-05, 'epoch': 0.19} + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:20<1:13:17, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [16:00<1:16:01, 19.01s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2467, 'learning_rate': 3.42e-05, 'epoch': 0.2} + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2223, 'learning_rate': 3.48e-05, 'epoch': 0.2} + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:39<1:16:29, 19.28s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1693, 'learning_rate': 3.539999999999999e-05, 'epoch': 0.2} + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:58<1:16:22, 19.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2163, 'learning_rate': 3.5999999999999994e-05, 'epoch': 0.21} + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:17<1:15:48, 19.27s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1301, 'learning_rate': 3.6599999999999995e-05, 'epoch': 0.21} + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:36<1:15:19, 19.23s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 63/297 [17:56<1:15:36, 19.39s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [18:15<1:14:48, 19.26s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|██████████��██████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:34<1:13:50, 19.10s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:53<1:12:59, 18.96s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2199, 'learning_rate': 3.96e-05, 'epoch': 0.23} + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████��███████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [19:11<1:12:11, 18.83s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.246, 'learning_rate': 4.02e-05, 'epoch': 0.23} + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:30<1:11:30, 18.74s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:48<1:10:52, 18.65s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [20:06<1:10:08, 18.54s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1451, 'learning_rate': 4.2e-05, 'epoch': 0.24} + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:24<1:09:20, 18.41s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:42<1:08:39, 18.31s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:42<1:08:39, 18.31s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2652, 'learning_rate': 4.259999999999999e-05, 'epoch': 0.24} + 24%|███████████████████▍ | 72/297 [20:42<1:08:39, 18.31s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:42<1:08:39, 18.31s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 72/297 [20:42<1:08:39, 18.31s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:21:34,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:21:34,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1772, 'learning_rate': 4.319999999999999e-05, 'epoch': 0.25} + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [21:00<1:07:50, 18.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:18<1:07:06, 18.06s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:18<1:07:06, 18.06s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:18<1:07:06, 18.06s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:18<1:07:06, 18.06s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:22:07,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:22:07,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:22:07,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:22:07,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 75/297 [21:36<1:06:45, 18.04s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1542, 'learning_rate': 4.4999999999999996e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:54<1:05:47, 17.86s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 77/297 [22:11<1:04:51, 17.69s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1771, 'learning_rate': 4.62e-05, 'epoch': 0.26} + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0612, 'learning_rate': 4.68e-05, 'epoch': 0.27} + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:45<1:03:00, 17.34s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [23:02<1:02:06, 17.17s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1813, 'learning_rate': 4.7999999999999994e-05, 'epoch': 0.27} + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:18<1:01:17, 17.03s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1929, 'learning_rate': 4.8599999999999995e-05, 'epoch': 0.28} + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:35<1:00:11, 16.80s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████��▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1566, 'learning_rate': 4.9199999999999997e-05, 'epoch': 0.28} + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:51<59:07, 16.58s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2066, 'learning_rate': 4.98e-05, 'epoch': 0.28} + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [24:06<57:51, 16.30s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:22<56:47, 16.07s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:22<56:47, 16.07s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:22<56:47, 16.07s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:07,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:07,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:07,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:07,985 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1838, 'learning_rate': 5.1e-05, 'epoch': 0.29} + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:37<55:38, 15.82s/it]g-point operations will not be computed-02 20:14:18,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 87/297 [24:52<54:31, 15.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:07<53:42, 15.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:07<53:42, 15.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2594, 'learning_rate': 5.2199999999999995e-05, 'epoch': 0.3} + 30%|████████████████████████▎ | 88/297 [25:07<53:42, 15.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [25:07<53:42, 15.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:54,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:54,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:25:54,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|█████████████���██████████▌ | 89/297 [25:21<52:05, 15.03s/it]g-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:21<52:05, 15.03s/it]g-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2559, 'learning_rate': 5.279999999999999e-05, 'epoch': 0.3} +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:04,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:04,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:04,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:04,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:04,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:13,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:13,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:13,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:13,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:21,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:21,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:21,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:21,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:25:32,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 91/297 [25:48<48:04, 14.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 91/297 [25:48<48:04, 14.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 91/297 [25:48<48:04, 14.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:33,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:33,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:37,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:37,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3723, 'learning_rate': 5.459999999999999e-05, 'epoch': 0.31} +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:37,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:43,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:46,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:46,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:46,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:27,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▋ | 93/297 [26:10<43:04, 12.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:26:50,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▋ | 93/297 [26:10<43:04, 12.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:26:50,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:53,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:50,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:56,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:50,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:56,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:50,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:26:56,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:26:50,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 94/297 [26:20<40:07, 11.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:02,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:02,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:05,812 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:08,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:08,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:10,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:12,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:14,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:16,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:16,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3846, 'learning_rate': 5.6999999999999996e-05, 'epoch': 0.32} +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:19,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:20,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:20,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 97/297 [26:45<31:14, 9.37s/it]g-point operations will not be computed-02 20:27:00,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:26,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:24,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:27,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:24,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 98/297 [26:52<28:07, 8.48s/it]g-point operations will not be computed-02 20:27:24,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 98/297 [26:52<28:07, 8.48s/it]g-point operations will not be computed-02 20:27:24,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:32,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:30,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:33,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:30,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:33,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:30,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▎ | 99/297 [26:57<25:00, 7.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:27:36,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:38,566 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:36,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:39,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:36,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:39,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:36,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 100/297 [27:03<22:37, 6.89s/it]g-point operations will not be computed-02 20:27:36,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 100/297 [27:03<22:37, 6.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 100/297 [27:03<22:37, 6.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:49,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:49,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:54,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:54,289 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:59,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:27:59,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|█████████████████████���█████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 101/297 [27:24<36:30, 11.18s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:44<45:29, 14.00s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [28:04<51:17, 15.86s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:25<55:06, 17.13s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2254, 'learning_rate': 6.239999999999999e-05, 'epoch': 0.35} + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:44<57:23, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1199, 'learning_rate': 6.299999999999999e-05, 'epoch': 0.36} + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [29:04<58:53, 18.50s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:24<59:37, 18.83s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:43<59:58, 19.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2144, 'learning_rate': 6.479999999999999e-05, 'epoch': 0.37} + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████��███████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|████████████████████████████▉ | 109/297 [30:03<1:00:07, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 110/297 [30:22<59:58, 19.25s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.256, 'learning_rate': 6.599999999999999e-05, 'epoch': 0.37} + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:41<59:35, 19.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1764, 'learning_rate': 6.659999999999999e-05, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1245, 'learning_rate': 6.72e-05, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [31:00<59:02, 19.15s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:39<58:31, 19.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 115/297 [31:58<57:50, 19.07s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [32:16<56:56, 18.87s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:35<56:20, 18.78s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:35<56:20, 18.78s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.09, 'learning_rate': 6.96e-05, 'epoch': 0.39} + 39%|███████████████████████████████▉ | 117/297 [32:35<56:20, 18.78s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:35<56:20, 18.78s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:35<56:20, 18.78s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:35<56:20, 18.78s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:29,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1146, 'learning_rate': 7.02e-05, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:53<55:38, 18.65s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1764, 'learning_rate': 7.079999999999999e-05, 'epoch': 0.4} +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:33:49,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.181, 'learning_rate': 7.139999999999999e-05, 'epoch': 0.4} + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:29<54:11, 18.37s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:47<53:34, 18.26s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2696, 'learning_rate': 7.259999999999999e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [34:05<53:02, 18.19s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:23<52:26, 18.08s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2193, 'learning_rate': 7.379999999999999e-05, 'epoch': 0.42} + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:41<51:43, 17.94s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2735, 'learning_rate': 7.439999999999999e-05, 'epoch': 0.42} + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:59<51:28, 17.96s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1796, 'learning_rate': 7.5e-05, 'epoch': 0.42} + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [35:16<50:42, 17.79s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:33<50:02, 17.66s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:33<50:02, 17.66s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2039, 'learning_rate': 7.56e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▋ | 127/297 [35:33<50:02, 17.66s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:33<50:02, 17.66s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:33<50:02, 17.66s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:33<50:02, 17.66s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:36:26,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:36:26,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:51<49:18, 17.51s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2619, 'learning_rate': 7.68e-05, 'epoch': 0.43} + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████���████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [36:08<48:39, 17.38s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3677, 'learning_rate': 7.74e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:25<47:56, 17.23s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2429, 'learning_rate': 7.8e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 131/297 [36:41<47:10, 17.05s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:58<46:20, 16.85s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [37:14<45:41, 16.72s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:30<44:42, 16.45s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 135/297 [37:45<43:47, 16.22s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2316, 'learning_rate': 8.1e-05, 'epoch': 0.46} +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:39,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [38:16<41:36, 15.60s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:57,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:38:57,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:03,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:03,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:03,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:03,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 138/297 [38:31<40:49, 15.41s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 138/297 [38:31<40:49, 15.41s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 138/297 [38:31<40:49, 15.41s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 138/297 [38:31<40:49, 15.41s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:17,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:17,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:17,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:17,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 139/297 [38:45<39:35, 15.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 139/297 [38:45<39:35, 15.04s/it]g-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:28,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:28,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:28,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:28,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:36,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:36,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1356, 'learning_rate': 8.34e-05, 'epoch': 0.47} +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:36,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:36,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:44,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:44,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:44,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:44,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:27:43,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 141/297 [39:11<36:23, 14.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 141/297 [39:11<36:23, 14.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 141/297 [39:11<36:23, 14.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:56,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:39:56,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:01,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:01,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.156, 'learning_rate': 8.459999999999998e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:01,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:06,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:06,919 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:11,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 143/297 [39:34<32:43, 12.75s/it]g-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 143/297 [39:34<32:43, 12.75s/it]g-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:15,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:15,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:19,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:19,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:22,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:22,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:25,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:25,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:29,072 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:31,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:39:50,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 145/297 [39:54<28:38, 11.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 145/297 [39:54<28:38, 11.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:35,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:37,913 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:39,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:33,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 146/297 [40:03<26:18, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:42,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 146/297 [40:03<26:18, 10.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:42,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:43,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:42,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:45,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:42,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:47,572 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:42,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████��████████████████ | 147/297 [40:10<23:56, 9.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:49,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|████████████████████████████████████████ | 147/297 [40:10<23:56, 9.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:49,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:51,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:49,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:54,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:49,418 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 148/297 [40:17<21:32, 8.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:55,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 148/297 [40:17<21:32, 8.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:40:55,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:57,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:55,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:40:59,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:40:55,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 149/297 [40:22<19:04, 7.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:41:01,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 149/297 [40:22<19:04, 7.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:41:01,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:41:03,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:01,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 150/297 [40:27<17:05, 6.98s/it]g-point operations will not be computed-02 20:41:01,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 150/297 [40:27<17:05, 6.98s/it]g-point operations will not be computed-02 20:41:01,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 150/297 [40:27<17:05, 6.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 150/297 [40:27<17:05, 6.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:41:13,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:41:13,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:41:19,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:41:19,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:41:24,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2932, 'learning_rate': 8.999999999999999e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:48<27:10, 11.17s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2495, 'learning_rate': 9.059999999999999e-05, 'epoch': 0.51} + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2386, 'learning_rate': 9.12e-05, 'epoch': 0.51} + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:29<38:02, 15.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████���███████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:49<40:43, 17.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████���███████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 155/297 [42:09<42:25, 17.92s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2151, 'learning_rate': 9.3e-05, 'epoch': 0.52} + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:29<43:24, 18.47s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1392, 'learning_rate': 9.36e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:48<43:52, 18.80s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.165, 'learning_rate': 9.419999999999999e-05, 'epoch': 0.53} + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [43:08<44:15, 19.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1931, 'learning_rate': 9.539999999999999e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [43:28<44:12, 19.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [44:06<43:28, 19.18s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1198, 'learning_rate': 9.659999999999999e-05, 'epoch': 0.54} + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [44:25<43:03, 19.14s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2464, 'learning_rate': 9.719999999999999e-05, 'epoch': 0.55} + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:44<42:54, 19.21s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1406, 'learning_rate': 9.779999999999999e-05, 'epoch': 0.55} + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [45:03<42:18, 19.09s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|██████████��██████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [45:22<41:42, 18.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1875, 'learning_rate': 9.9e-05, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 166/297 [45:40<41:05, 18.82s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.152, 'learning_rate': 9.96e-05, 'epoch': 0.56} +[WARNING|modeling_utils.py:388] 2022-03-02 20:46:41,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:46:41,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:46:41,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:46:41,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:46:41,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:46:41,761 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:17<39:56, 18.58s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [46:17<39:56, 18.58s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1881, 'learning_rate': 0.0001002, 'epoch': 0.56} + 57%|█████████████████████████████████████████████▊ | 168/297 [46:17<39:56, 18.58s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:04,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:04,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:04,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:04,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:35<39:26, 18.49s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:35<39:26, 18.49s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2114, 'learning_rate': 0.0001008, 'epoch': 0.57} + 57%|██████████████████████████████████████████████ | 169/297 [46:35<39:26, 18.49s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:35<39:26, 18.49s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:35<39:26, 18.49s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:35<39:26, 18.49s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1061, 'learning_rate': 0.0001014, 'epoch': 0.57} +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1188, 'learning_rate': 0.000102, 'epoch': 0.58} +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:47:29,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [47:29<37:36, 18.05s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:47<37:06, 17.96s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1139, 'learning_rate': 0.00010379999999999999, 'epoch': 0.59} + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [48:04<36:34, 17.84s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3033, 'learning_rate': 0.00010439999999999999, 'epoch': 0.59} + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [48:22<36:17, 17.85s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1447, 'learning_rate': 0.00010499999999999999, 'epoch': 0.59} + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:39<35:40, 17.69s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2387, 'learning_rate': 0.00010559999999999998, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:57<35:04, 17.53s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [49:14<34:28, 17.38s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [49:14<34:28, 17.38s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [49:14<34:28, 17.38s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [49:14<34:28, 17.38s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:02,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1095, 'learning_rate': 0.00010679999999999998, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 179/297 [49:31<33:51, 17.22s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:47<33:16, 17.07s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:47<33:16, 17.07s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3137, 'learning_rate': 0.00010739999999999998, 'epoch': 0.61} +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:31,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:31,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:31,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:31,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:50:31,802 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1593, 'learning_rate': 0.00010799999999999998, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [50:04<32:38, 16.88s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0903, 'learning_rate': 0.00010859999999999998, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [50:20<32:04, 16.74s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:36<31:26, 16.55s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:52<30:46, 16.34s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [51:08<30:04, 16.11s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1536, 'learning_rate': 0.00011099999999999999, 'epoch': 0.63} +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:01,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:38<28:40, 15.64s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:38<28:40, 15.64s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:38<28:40, 15.64s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:38<28:40, 15.64s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:38<28:40, 15.64s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:38<28:40, 15.64s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:29,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:53<28:11, 15.52s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:53<28:11, 15.52s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1625, 'learning_rate': 0.00011219999999999999, 'epoch': 0.63} + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:53<28:11, 15.52s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:53<28:11, 15.52s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:53<28:11, 15.52s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:53<28:11, 15.52s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:44,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 189/297 [52:08<27:16, 15.15s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 189/297 [52:08<27:16, 15.15s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1326, 'learning_rate': 0.00011279999999999999, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▌ | 189/297 [52:08<27:16, 15.15s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 189/297 [52:08<27:16, 15.15s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:54,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:54,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:54,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:52:54,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 190/297 [52:21<26:04, 14.62s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:02,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:02,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:02,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:08,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:08,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:08,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 191/297 [52:33<24:43, 13.99s/it]g-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:14,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:14,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:14,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:20,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:20,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:20,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:41:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 192/297 [52:45<23:21, 13.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 192/297 [52:45<23:21, 13.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:29,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:29,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:33,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████��███████████████████▋ | 193/297 [52:56<21:50, 12.60s/it]g-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 193/297 [52:56<21:50, 12.60s/it]g-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:37,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:39,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:39,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:43,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:25,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 194/297 [53:06<20:10, 11.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 194/297 [53:06<20:10, 11.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:47,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:47,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:51,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:53,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:53,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2756, 'learning_rate': 0.0001164, 'epoch': 0.66} +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:57,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:53:59,440 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:01,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:01,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:53:45,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 196/297 [53:24<17:23, 10.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:54:03,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:05,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:03,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:06,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:03,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [53:31<15:37, 9.38s/it]g-point operations will not be computed-02 20:54:03,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [53:31<15:37, 9.38s/it]g-point operations will not be computed-02 20:54:03,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [53:31<15:37, 9.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:54:10,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:13,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:10,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:37<13:58, 8.47s/it]g-point operations will not be computed-02 20:54:10,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:37<13:58, 8.47s/it]g-point operations will not be computed-02 20:54:10,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:37<13:58, 8.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:54:16,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:19,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:16,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 199/297 [53:43<12:21, 7.57s/it]g-point operations will not be computed-02 20:54:16,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 199/297 [53:43<12:21, 7.57s/it]g-point operations will not be computed-02 20:54:16,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:23,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:21,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:25,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:21,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:48<11:06, 6.87s/it]g-point operations will not be computed-02 20:54:21,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:48<11:06, 6.87s/it]g-point operations will not be computed-02 20:54:21,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:48<11:06, 6.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:48<11:06, 6.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:34,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:34,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:40,094 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:40,094 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:54:45,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [54:09<17:54, 11.19s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1895, 'learning_rate': 0.00012119999999999999, 'epoch': 0.68} + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 202/297 [54:30<22:01, 13.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.133, 'learning_rate': 0.00012179999999999999, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████��██████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [55:10<26:25, 17.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1853, 'learning_rate': 0.0001224, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [55:30<27:26, 17.90s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:49<27:59, 18.46s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [56:09<28:10, 18.79s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [56:09<28:10, 18.79s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [56:09<28:10, 18.79s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:56:57,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:56:57,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:56:57,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 20:56:57,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.19, 'learning_rate': 0.00012419999999999998, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [56:29<28:12, 19.01s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|██████��██████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:48<27:59, 19.09s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2564, 'learning_rate': 0.00012539999999999999, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [57:07<27:45, 19.15s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0469, 'learning_rate': 0.00012599999999999997, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [57:26<27:20, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 212/297 [57:45<26:58, 19.05s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|████████████████████████████████████████████████████��█████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [58:05<26:49, 19.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [58:23<26:23, 19.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|████████████████████████████████���█████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:42<25:55, 18.97s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.182, 'learning_rate': 0.000129, 'epoch': 0.73} + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 216/297 [59:00<25:20, 18.78s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1601, 'learning_rate': 0.00012959999999999998, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [59:19<24:51, 18.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0804, 'learning_rate': 0.0001302, 'epoch': 0.73} + 73%|█████████████████████████████████████████████████���█████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [59:37<24:24, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0997, 'learning_rate': 0.00013079999999999998, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:55<23:55, 18.40s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1931, 'learning_rate': 0.0001314, 'epoch': 0.74} + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▌ | 220/297 [1:00:13<23:30, 18.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|██████████████████████████████████████████████████████████▊ | 221/297 [1:00:31<23:07, 18.26s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0679, 'learning_rate': 0.0001326, 'epoch': 0.75} + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████��██████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:49<22:42, 18.16s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▎ | 223/297 [1:01:07<22:16, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1506, 'learning_rate': 0.0001338, 'epoch': 0.75} +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:07,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:07,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:07,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:07,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:07,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:07,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1266, 'learning_rate': 0.0001344, 'epoch': 0.76} + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▊ | 225/297 [1:01:43<21:39, 18.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:02:01<21:08, 17.87s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:02:01<21:08, 17.87s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:02:01<21:08, 17.87s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:02:01<21:08, 17.87s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:02:01<21:08, 17.87s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:51,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:51,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:02:51,794 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:02:18<20:35, 17.65s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.063, 'learning_rate': 0.0001362, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:02:35<20:07, 17.50s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1355, 'learning_rate': 0.0001368, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:52<19:40, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:03:09<19:13, 17.22s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:03:26<18:47, 17.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1313, 'learning_rate': 0.0001386, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:03:42<18:20, 16.93s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2142, 'learning_rate': 0.0001392, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:58<17:52, 16.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:04:15<17:22, 16.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████���███████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:04:30<16:50, 16.31s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:46<16:21, 16.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:46<16:21, 16.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:46<16:21, 16.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:46<16:21, 16.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:46<16:21, 16.08s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:05:35,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:05:35,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.177, 'learning_rate': 0.00014159999999999997, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:05:01<15:51, 15.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:17<15:27, 15.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:05:17<15:27, 15.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:00,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:04,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:04,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:04,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:04,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:31<14:48, 15.32s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:31<14:48, 15.32s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:05:31<14:48, 15.32s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:16,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:16,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:16,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:16,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:16,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 240/297 [1:05:45<14:06, 14.86s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:26,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:26,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:26,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:33,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:33,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 241/297 [1:05:58<13:24, 14.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 241/297 [1:05:58<13:24, 14.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3344, 'learning_rate': 0.00014399999999999998, 'epoch': 0.81} +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:41,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:41,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:41,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:47,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 242/297 [1:06:10<12:38, 13.79s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 242/297 [1:06:10<12:38, 13.79s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3554, 'learning_rate': 0.0001446, 'epoch': 0.81} +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:53,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:53,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:53,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:06:59,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 243/297 [1:06:22<11:49, 13.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 243/297 [1:06:22<11:49, 13.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:03,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:03,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:07,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:07,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:10,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:10,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:13,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:13,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:17,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:19,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:19,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|█████████████████████████████████████████████████████████████████▏ | 245/297 [1:06:42<10:00, 11.56s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:23,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:25,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:27,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:29,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:29,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:31,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:33,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:35,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:37,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:37,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:38,975 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:40,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:43,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:43,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:45,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:48,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:49,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:49,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:52,020 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:53,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:54,808 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:07:54,808 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1969, 'learning_rate': 0.0001494, 'epoch': 0.84} +[WARNING|modeling_utils.py:388] 2022-03-02 21:08:00,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:08:00,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:08:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:08:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:08:11,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:08:11,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3411, 'learning_rate': 0.00015, 'epoch': 0.84} + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:07:38<08:46, 11.45s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|█████��█████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2298, 'learning_rate': 0.00015059999999999997, 'epoch': 0.85} + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:59<10:38, 14.20s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2068, 'learning_rate': 0.0001512, 'epoch': 0.85} + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|████��██████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:08:19<11:42, 15.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████���███████████████████████▌ | 254/297 [1:08:39<12:18, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1387, 'learning_rate': 0.0001524, 'epoch': 0.86} + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:59<12:34, 17.98s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|█████████████████████████████████████████████████████████████��██████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:09:18<12:39, 18.54s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3753, 'learning_rate': 0.0001536, 'epoch': 0.86} + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:09:38<12:33, 18.85s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1422, 'learning_rate': 0.00015419999999999998, 'epoch': 0.87} + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:57<12:22, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:10:17<12:07, 19.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:10:36<11:47, 19.13s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:10:36<11:47, 19.13s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1135, 'learning_rate': 0.00015539999999999998, 'epoch': 0.87} + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:10:36<11:47, 19.13s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:10:36<11:47, 19.13s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:11:26,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:11:26,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:11:26,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:11:26,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:55<11:26, 19.06s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████���███████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:11:14<11:06, 19.03s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1273, 'learning_rate': 0.0001572, 'epoch': 0.88} + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|███████████████��█████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:11:33<10:49, 19.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:52<10:27, 19.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1688, 'learning_rate': 0.0001584, 'epoch': 0.89} + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3492, 'learning_rate': 0.000159, 'epoch': 0.89} + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1599, 'learning_rate': 0.0001596, 'epoch': 0.9} + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████ | 267/297 [1:12:47<09:17, 18.58s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1327, 'learning_rate': 0.0001602, 'epoch': 0.9} + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:13:05<08:54, 18.44s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0783, 'learning_rate': 0.0001608, 'epoch': 0.9} + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████���██████▌ | 269/297 [1:13:23<08:33, 18.34s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2492, 'learning_rate': 0.0001614, 'epoch': 0.91} +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:14:24,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:59<07:51, 18.14s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:14:17<07:30, 18.02s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1312, 'learning_rate': 0.0001632, 'epoch': 0.92} + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▌ | 273/297 [1:14:35<07:09, 17.91s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1231, 'learning_rate': 0.0001638, 'epoch': 0.92} +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:15:30,188 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1969, 'learning_rate': 0.0001644, 'epoch': 0.92} + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:15:10<06:30, 17.73s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1583, 'learning_rate': 0.000165, 'epoch': 0.93} + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|███████████████████████���█████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:15:27<06:08, 17.55s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:15:44<05:47, 17.36s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0859, 'learning_rate': 0.0001662, 'epoch': 0.93} + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:16:00<05:26, 17.17s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1179, 'learning_rate': 0.0001668, 'epoch': 0.94} + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:16:17<05:05, 16.99s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1002, 'learning_rate': 0.0001674, 'epoch': 0.94} + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:16:33<04:45, 16.81s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:16:50<04:27, 16.70s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████��████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:17:06<04:07, 16.49s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████���███████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:17:21<03:47, 16.23s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:14,953 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:52<03:07, 15.63s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:17:52<03:07, 15.63s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:35,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:35,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:35,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:35,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:35,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:35,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|████████████████████████████████████████████████████████████████████████████ | 286/297 [1:18:06<02:47, 15.25s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:47,929 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:51,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:51,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:51,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:51,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:18:51,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▎ | 287/297 [1:18:20<02:28, 14.89s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:01,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:01,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:07,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:07,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:07,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:07,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▌ | 288/297 [1:18:34<02:12, 14.76s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:16,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:16,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:16,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:22,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:22,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:22,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▊ | 289/297 [1:18:47<01:52, 14.11s/it]g-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:28,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:28,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:28,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:34,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:34,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:34,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 20:54:29,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▏ | 290/297 [1:18:59<01:33, 13.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:19:38,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▏ | 290/297 [1:18:59<01:33, 13.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:19:38,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:42,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:38,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:42,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:38,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:46,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:38,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▍ | 291/297 [1:19:09<01:15, 12.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:19:49,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▍ | 291/297 [1:19:09<01:15, 12.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:19:49,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2094, 'learning_rate': 0.00017399999999999997, 'epoch': 0.98} +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:52,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:49,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:55,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:49,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:55,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:49,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:19:55,228 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:49,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▋ | 292/297 [1:19:19<00:58, 11.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:19:58,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:01,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:58,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:03,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:58,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:05,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:19:58,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|█████████████████████████████████████████████████████████████████████████████▉ | 293/297 [1:19:28<00:43, 10.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:20:07,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|█████████████████████████████████████████████████████████████████████████████▉ | 293/297 [1:19:28<00:43, 10.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:20:07,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:09,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:07,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:11,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:07,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:13,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:07,530 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▏| 294/297 [1:19:36<00:29, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:20:15,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▏| 294/297 [1:19:36<00:29, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 21:20:15,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:16,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:15,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:18,704 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:15,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▍| 295/297 [1:19:43<00:18, 9.04s/it]g-point operations will not be computed-02 21:20:15,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|██████████████████████████████████████████████████████████████████████████████▍| 295/297 [1:19:43<00:18, 9.04s/it]g-point operations will not be computed-02 21:20:15,209 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:23,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:22,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:24,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:22,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|██████████████████████████████████████████████████████████████████████████████▋| 296/297 [1:19:49<00:08, 8.11s/it]g-point operations will not be computed-02 21:20:22,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|██████████████████████████████████████████████████████████████████████████████▋| 296/297 [1:19:49<00:08, 8.11s/it]g-point operations will not be computed-02 21:20:22,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:29,090 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:27,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 21:20:31,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 21:20:27,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2114] 2022-03-02 21:20:32,143 >> Saving model checkpoint to ./=)███| 297/297 [1:19:54<00:00, 7.17s/it][INFO|trainer.py:1492] 2022-03-02 21:20:32,141 >> 7,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2114] 2022-03-02 21:20:32,143 >> Saving model checkpoint to ./=)███| 297/297 [1:19:54<00:00, 7.17s/it][INFO|trainer.py:1492] 2022-03-02 21:20:32,141 >> 7,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5291, 'learning_rate': 0.00017759999999999998, 'epoch': 1.0} +[INFO|trainer.py:2114] 2022-03-02 21:20:48,317 >> Saving model checkpoint to ./ ./pytorch_model.bin:54<00:00, 7.17s/it][INFO|trainer.py:1492] 2022-03-02 21:20:32,141 >> 7,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|modeling_utils.py:1081] 2022-03-02 21:21:04,531 >> Model weights saved in ./pytorch_model.bin:54<00:00, 7.17s/it][INFO|trainer.py:1492] 2022-03-02 21:20:32,141 >> 7,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Adding files tracked by Git LFS: ['wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb']. This may take a bit of time if the files are large.2022-03-02 21:20:32,141 >> 7,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +Adding files tracked by Git LFS: ['wandb/run-20220302_200036-31e4k99c/run-31e4k99c.wandb']. This may take a bit of time if the files are large.2022-03-02 21:20:32,141 >> 7,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed