diff --git "a/wandb/run-20220301_103527-1wkgn37c/files/output.log" "b/wandb/run-20220301_103527-1wkgn37c/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220301_103527-1wkgn37c/files/output.log" @@ -0,0 +1,2545 @@ + + + 0%| | 0/594 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:34,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:36,880 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.825, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:39,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 1/594 [00:11<1:49:36, 11.09s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:35:42,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:44,542 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:46,978 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9943, 'learning_rate': 6.000000000000001e-08, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:49,596 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 2/594 [00:21<1:45:00, 10.64s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:35:52,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:54,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:57,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8848, 'learning_rate': 1.2000000000000002e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-01 10:35:59,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 3/594 [00:31<1:42:22, 10.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:36:02,419 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:04,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:07,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8356, 'learning_rate': 1.8e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:09,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 4/594 [00:41<1:40:46, 10.25s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:36:12,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:14,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:17,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:19,762 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 5/594 [00:51<1:39:34, 10.14s/it] + + 1%|▋ | 5/594 [00:51<1:39:34, 10.14s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:36:22,313 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:24,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:27,230 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:29,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 6/594 [01:01<1:38:33, 10.06s/it] + + 1%|▊ | 6/594 [01:01<1:38:33, 10.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:36:32,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:34,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:37,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:39,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 7/594 [01:11<1:37:42, 9.99s/it] + + 1%|▉ | 7/594 [01:11<1:37:42, 9.99s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:36:42,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:44,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:46,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:49,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 8/594 [01:21<1:37:08, 9.95s/it] + + 1%|█ | 8/594 [01:21<1:37:08, 9.95s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:36:51,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:54,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:56,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:36:59,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▏ | 9/594 [01:30<1:36:18, 9.88s/it] + + 2%|█▏ | 9/594 [01:30<1:36:18, 9.88s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:01,611 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:04,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:06,464 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:08,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 10/594 [01:40<1:35:51, 9.85s/it] + + 2%|█▎ | 10/594 [01:40<1:35:51, 9.85s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:11,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:13,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:16,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8168, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:18,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▍ | 11/594 [01:50<1:34:56, 9.77s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:20,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:23,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:25,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:27,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 12/594 [01:59<1:34:00, 9.69s/it] + + 2%|█▌ | 12/594 [01:59<1:34:00, 9.69s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:30,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:32,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:35,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7954, 'learning_rate': 7.2e-07, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:37,477 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▊ | 13/594 [02:09<1:33:18, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:39,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:42,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:44,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7457, 'learning_rate': 7.799999999999999e-07, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:46,984 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▉ | 14/594 [02:18<1:32:45, 9.60s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:49,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:51,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:54,094 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:37:56,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 15/594 [02:28<1:32:18, 9.57s/it] + + 3%|██ | 15/594 [02:28<1:32:18, 9.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:37:58,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:01,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:03,440 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8226, 'learning_rate': 9e-07, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:05,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 16/594 [02:37<1:31:03, 9.45s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:38:08,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:10,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:12,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:14,883 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 17/594 [02:46<1:30:13, 9.38s/it] + + 3%|██▎ | 17/594 [02:46<1:30:13, 9.38s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:38:17,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:19,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:21,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8984, 'learning_rate': 1.0200000000000002e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:23,999 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▍ | 18/594 [02:55<1:29:17, 9.30s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:38:26,385 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:28,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:30,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:33,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 19/594 [03:04<1:28:40, 9.25s/it] + + 3%|██▌ | 19/594 [03:04<1:28:40, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:38:35,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:37,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:39,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:42,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8867, 'learning_rate': 1.14e-06, 'epoch': 0.03} + 3%|██▋ | 20/594 [03:13<1:27:54, 9.19s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:38:44,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:46,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:48,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8371, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:51,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▊ | 21/594 [03:22<1:27:20, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:38:53,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:55,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:38:57,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7583, 'learning_rate': 1.26e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:00,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▉ | 22/594 [03:31<1:26:30, 9.07s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:02,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:04,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:06,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7754, 'learning_rate': 1.3199999999999999e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:09,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|███ | 23/594 [03:40<1:25:53, 9.03s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:11,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:13,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:15,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:17,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8551, 'learning_rate': 1.38e-06, 'epoch': 0.04} + 4%|███▏ | 24/594 [03:49<1:25:16, 8.98s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:20,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:22,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:24,501 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6186, 'learning_rate': 1.44e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:27,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 25/594 [03:59<1:26:03, 9.07s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:29,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:31,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:29,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:33,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:29,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 26/594 [04:07<1:24:45, 8.95s/it]g-point operations will not be computed-01 10:39:29,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 26/594 [04:07<1:24:45, 8.95s/it]g-point operations will not be computed-01 10:39:29,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 26/594 [04:07<1:24:45, 8.95s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:38,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:40,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:38,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:42,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:38,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 27/594 [04:16<1:23:35, 8.85s/it]g-point operations will not be computed-01 10:39:38,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 27/594 [04:16<1:23:35, 8.85s/it]g-point operations will not be computed-01 10:39:38,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 27/594 [04:16<1:23:35, 8.85s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:46,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:48,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:46,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:50,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:46,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 28/594 [04:24<1:22:46, 8.77s/it]g-point operations will not be computed-01 10:39:46,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 28/594 [04:24<1:22:46, 8.77s/it]g-point operations will not be computed-01 10:39:46,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 28/594 [04:24<1:22:46, 8.77s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:39:55,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:57,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:55,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:59,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:55,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:39:59,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:39:55,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 29/594 [04:33<1:21:47, 8.69s/it]g-point operations will not be computed-01 10:39:55,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 29/594 [04:33<1:21:47, 8.69s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:03,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:05,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:03,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:07,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:03,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:07,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:03,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 30/594 [04:41<1:20:56, 8.61s/it]g-point operations will not be computed-01 10:40:03,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 30/594 [04:41<1:20:56, 8.61s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:12,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:14,198 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:12,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:16,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:12,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:16,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:12,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 31/594 [04:50<1:19:51, 8.51s/it]g-point operations will not be computed-01 10:40:12,123 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 31/594 [04:50<1:19:51, 8.51s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:20,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:22,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:20,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:24,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:20,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:24,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:20,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 32/594 [04:58<1:18:39, 8.40s/it]g-point operations will not be computed-01 10:40:20,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 32/594 [04:58<1:18:39, 8.40s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:28,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:30,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:28,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:32,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:28,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:32,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:28,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 33/594 [05:06<1:17:49, 8.32s/it]g-point operations will not be computed-01 10:40:28,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 33/594 [05:06<1:17:49, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:36,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:38,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:36,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:40,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:36,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:40,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:36,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 34/594 [05:14<1:16:12, 8.17s/it]g-point operations will not be computed-01 10:40:36,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 34/594 [05:14<1:16:12, 8.17s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:44,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:46,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:44,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:48,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:44,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:48,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:44,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 35/594 [05:21<1:14:47, 8.03s/it]g-point operations will not be computed-01 10:40:44,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 35/594 [05:21<1:14:47, 8.03s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:52,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:53,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:52,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:55,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:52,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:40:55,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:52,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 36/594 [05:29<1:13:35, 7.91s/it]g-point operations will not be computed-01 10:40:52,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 36/594 [05:29<1:13:35, 7.91s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:40:59,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:01,537 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:59,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:05,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:59,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:05,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:40:59,690 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5558, 'learning_rate': 2.1000000000000002e-06, 'epoch': 0.06} + 6%|████▉ | 37/594 [05:36<1:12:18, 7.79s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:07,120 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:08,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:10,698 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7056, 'learning_rate': 2.16e-06, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:12,428 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 38/594 [05:44<1:10:36, 7.62s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:14,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 38/594 [05:44<1:10:36, 7.62s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:14,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 38/594 [05:44<1:10:36, 7.62s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:14,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 39/594 [05:51<1:08:47, 7.44s/it]g-point operations will not be computed-01 10:41:14,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 39/594 [05:51<1:08:47, 7.44s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:21,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 39/594 [05:51<1:08:47, 7.44s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:21,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:24,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:21,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 40/594 [05:58<1:07:03, 7.26s/it]g-point operations will not be computed-01 10:41:21,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 40/594 [05:58<1:07:03, 7.26s/it]g-point operations will not be computed-01 10:41:21,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 40/594 [05:58<1:07:03, 7.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:28,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:31,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:28,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 41/594 [06:04<1:04:44, 7.02s/it]g-point operations will not be computed-01 10:41:28,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 41/594 [06:04<1:04:44, 7.02s/it]g-point operations will not be computed-01 10:41:28,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 41/594 [06:04<1:04:44, 7.02s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:34,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:37,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:34,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 42/594 [06:10<1:02:20, 6.78s/it]g-point operations will not be computed-01 10:41:34,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 42/594 [06:10<1:02:20, 6.78s/it]g-point operations will not be computed-01 10:41:34,430 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 42/594 [06:10<1:02:20, 6.78s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:40,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:43,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:40,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 43/594 [06:16<59:18, 6.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:46,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 43/594 [06:16<59:18, 6.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:46,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5138, 'learning_rate': 2.46e-06, 'epoch': 0.07} + 7%|██████ | 44/594 [06:21<55:34, 6.06s/it]g-point operations will not be computed-01 10:41:46,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|██████ | 44/594 [06:21<55:34, 6.06s/it]g-point operations will not be computed-01 10:41:46,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|██████ | 44/594 [06:21<55:34, 6.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:53,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:53,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 45/594 [06:26<51:25, 5.62s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:55,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:57,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:55,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:41:57,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:41:55,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 46/594 [06:30<47:13, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:59,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 46/594 [06:30<47:13, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:41:59,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 47/594 [06:33<43:02, 4.72s/it]g-point operations will not be computed-01 10:41:59,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 47/594 [06:33<43:02, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:03,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 47/594 [06:33<43:02, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:03,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 48/594 [06:37<38:50, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:06,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 48/594 [06:37<38:50, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:06,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 49/594 [06:40<34:58, 3.85s/it]g-point operations will not be computed-01 10:42:06,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:10,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:09,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:10,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:09,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 50/594 [06:42<32:19, 3.57s/it]g-point operations will not be computed-01 10:42:09,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▉ | 50/594 [06:42<32:19, 3.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:14,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|���█████▉ | 50/594 [06:42<32:19, 3.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:14,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:19,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:14,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:19,340 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:14,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 51/594 [06:53<51:43, 5.72s/it]g-point operations will not be computed-01 10:42:14,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 51/594 [06:53<51:43, 5.72s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:24,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 51/594 [06:53<51:43, 5.72s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:24,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:29,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:24,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:29,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:24,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 52/594 [07:03<1:03:52, 7.07s/it]g-point operations will not be computed-01 10:42:24,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 52/594 [07:03<1:03:52, 7.07s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:34,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 52/594 [07:03<1:03:52, 7.07s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:34,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:39,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:34,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:39,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:34,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 53/594 [07:14<1:12:26, 8.03s/it]g-point operations will not be computed-01 10:42:34,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 53/594 [07:14<1:12:26, 8.03s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:45,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 53/594 [07:14<1:12:26, 8.03s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:45,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:50,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:45,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:42:50,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:45,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 54/594 [07:24<1:18:05, 8.68s/it]g-point operations will not be computed-01 10:42:45,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 54/594 [07:24<1:18:05, 8.68s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:55,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 54/594 [07:24<1:18:05, 8.68s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:42:55,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:00,079 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:55,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:00,079 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:55,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:00,079 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:42:55,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 55/594 [07:34<1:21:11, 9.04s/it]g-point operations will not be computed-01 10:42:55,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 55/594 [07:34<1:21:11, 9.04s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 55/594 [07:34<1:21:11, 9.04s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:09,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 56/594 [07:44<1:23:26, 9.31s/it]g-point operations will not be computed-01 10:43:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 56/594 [07:44<1:23:26, 9.31s/it]g-point operations will not be computed-01 10:43:05,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 56/594 [07:44<1:23:26, 9.31s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:14,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 56/594 [07:44<1:23:26, 9.31s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:14,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:19,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:14,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 57/594 [07:53<1:24:38, 9.46s/it]g-point operations will not be computed-01 10:43:14,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 57/594 [07:53<1:24:38, 9.46s/it]g-point operations will not be computed-01 10:43:14,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 57/594 [07:53<1:24:38, 9.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:24,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 57/594 [07:53<1:24:38, 9.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:24,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:29,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:24,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 58/594 [08:03<1:25:15, 9.54s/it]g-point operations will not be computed-01 10:43:24,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 58/594 [08:03<1:25:15, 9.54s/it]g-point operations will not be computed-01 10:43:24,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 58/594 [08:03<1:25:15, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:34,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 58/594 [08:03<1:25:15, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:34,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:39,264 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:34,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:39,264 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:34,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 59/594 [08:13<1:25:39, 9.61s/it]g-point operations will not be computed-01 10:43:34,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 59/594 [08:13<1:25:39, 9.61s/it]g-point operations will not be computed-01 10:43:34,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 59/594 [08:13<1:25:39, 9.61s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:44,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 59/594 [08:13<1:25:39, 9.61s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:44,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:49,162 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:44,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 60/594 [08:23<1:26:08, 9.68s/it]g-point operations will not be computed-01 10:43:44,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 60/594 [08:23<1:26:08, 9.68s/it]g-point operations will not be computed-01 10:43:44,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 60/594 [08:23<1:26:08, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:54,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 60/594 [08:23<1:26:08, 9.68s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:43:54,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:43:58,809 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:43:54,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 61/594 [08:32<1:25:36, 9.64s/it]g-point operations will not be computed-01 10:43:54,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 61/594 [08:32<1:25:36, 9.64s/it]g-point operations will not be computed-01 10:43:54,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 61/594 [08:32<1:25:36, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:03,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 61/594 [08:32<1:25:36, 9.64s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:03,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:08,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:03,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 62/594 [08:42<1:24:54, 9.58s/it]g-point operations will not be computed-01 10:44:03,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 62/594 [08:42<1:24:54, 9.58s/it]g-point operations will not be computed-01 10:44:03,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 62/594 [08:42<1:24:54, 9.58s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:12,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 62/594 [08:42<1:24:54, 9.58s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:12,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:17,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:12,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 63/594 [08:51<1:24:25, 9.54s/it]g-point operations will not be computed-01 10:44:12,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 63/594 [08:51<1:24:25, 9.54s/it]g-point operations will not be computed-01 10:44:12,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 63/594 [08:51<1:24:25, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:22,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 63/594 [08:51<1:24:25, 9.54s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:22,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:27,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:22,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:27,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:22,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 64/594 [09:01<1:23:56, 9.50s/it]g-point operations will not be computed-01 10:44:22,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 64/594 [09:01<1:23:56, 9.50s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:31,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 64/594 [09:01<1:23:56, 9.50s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:31,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:36,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:31,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 65/594 [09:10<1:23:13, 9.44s/it]g-point operations will not be computed-01 10:44:31,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 65/594 [09:10<1:23:13, 9.44s/it]g-point operations will not be computed-01 10:44:31,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 65/594 [09:10<1:23:13, 9.44s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:41,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 65/594 [09:10<1:23:13, 9.44s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:41,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:45,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:41,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:45,685 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:41,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 66/594 [09:19<1:22:41, 9.40s/it]g-point operations will not be computed-01 10:44:41,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 66/594 [09:19<1:22:41, 9.40s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:50,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 66/594 [09:19<1:22:41, 9.40s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:50,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:55,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:50,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:44:55,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:50,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 67/594 [09:29<1:22:12, 9.36s/it]g-point operations will not be computed-01 10:44:50,444 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 67/594 [09:29<1:22:12, 9.36s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:59,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████ | 67/594 [09:29<1:22:12, 9.36s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:44:59,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:45:04,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:59,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:45:04,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:44:59,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 68/594 [09:38<1:21:43, 9.32s/it]g-point operations will not be computed-01 10:44:59,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 68/594 [09:38<1:21:43, 9.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:08,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 68/594 [09:38<1:21:43, 9.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:08,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:45:13,399 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:08,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 69/594 [09:47<1:20:57, 9.25s/it]g-point operations will not be computed-01 10:45:08,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 69/594 [09:47<1:20:57, 9.25s/it]g-point operations will not be computed-01 10:45:08,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 69/594 [09:47<1:20:57, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:17,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▎ | 69/594 [09:47<1:20:57, 9.25s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:17,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:45:22,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:17,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:45:22,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:17,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 70/594 [09:56<1:20:25, 9.21s/it]g-point operations will not be computed-01 10:45:17,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 70/594 [09:56<1:20:25, 9.21s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:27,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 70/594 [09:56<1:20:25, 9.21s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:27,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:45:31,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:27,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 71/594 [10:05<1:19:46, 9.15s/it]g-point operations will not be computed-01 10:45:27,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 71/594 [10:05<1:19:46, 9.15s/it]g-point operations will not be computed-01 10:45:27,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 71/594 [10:05<1:19:46, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 71/594 [10:05<1:19:46, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 71/594 [10:05<1:19:46, 9.15s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 72/594 [10:14<1:18:50, 9.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 72/594 [10:14<1:18:50, 9.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4206, 'learning_rate': 4.14e-06, 'epoch': 0.12} + 12%|█████████▋ | 72/594 [10:14<1:18:50, 9.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 72/594 [10:14<1:18:50, 9.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 73/594 [10:23<1:17:52, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 73/594 [10:23<1:17:52, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2009, 'learning_rate': 4.2000000000000004e-06, 'epoch': 0.12} + 12%|█████████▊ | 73/594 [10:23<1:17:52, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 73/594 [10:23<1:17:52, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 73/594 [10:23<1:17:52, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 74/594 [10:31<1:17:03, 8.89s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 74/594 [10:31<1:17:03, 8.89s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 74/594 [10:31<1:17:03, 8.89s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 74/594 [10:31<1:17:03, 8.89s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 74/594 [10:31<1:17:03, 8.89s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 75/594 [10:41<1:17:35, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 75/594 [10:41<1:17:35, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 75/594 [10:41<1:17:35, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 75/594 [10:41<1:17:35, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 75/594 [10:41<1:17:35, 8.97s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 76/594 [10:49<1:16:26, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 76/594 [10:49<1:16:26, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 76/594 [10:49<1:16:26, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 76/594 [10:49<1:16:26, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 76/594 [10:49<1:16:26, 8.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 77/594 [10:58<1:15:19, 8.74s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 77/594 [10:58<1:15:19, 8.74s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 77/594 [10:58<1:15:19, 8.74s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 77/594 [10:58<1:15:19, 8.74s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 77/594 [10:58<1:15:19, 8.74s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 78/594 [11:06<1:14:24, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 78/594 [11:06<1:14:24, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 78/594 [11:06<1:14:24, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 78/594 [11:06<1:14:24, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 78/594 [11:06<1:14:24, 8.65s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 79/594 [11:14<1:13:24, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 79/594 [11:14<1:13:24, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 79/594 [11:14<1:13:24, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 79/594 [11:14<1:13:24, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▋ | 79/594 [11:14<1:13:24, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 80/594 [11:23<1:12:39, 8.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 80/594 [11:23<1:12:39, 8.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 80/594 [11:23<1:12:39, 8.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 80/594 [11:23<1:12:39, 8.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 80/594 [11:23<1:12:39, 8.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 81/594 [11:31<1:11:44, 8.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 81/594 [11:31<1:11:44, 8.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 81/594 [11:31<1:11:44, 8.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 81/594 [11:31<1:11:44, 8.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 81/594 [11:31<1:11:44, 8.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 82/594 [11:39<1:11:06, 8.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 82/594 [11:39<1:11:06, 8.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 82/594 [11:39<1:11:06, 8.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 82/594 [11:39<1:11:06, 8.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 82/594 [11:39<1:11:06, 8.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 83/594 [11:47<1:10:19, 8.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 83/594 [11:47<1:10:19, 8.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 83/594 [11:47<1:10:19, 8.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 83/594 [11:47<1:10:19, 8.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 83/594 [11:47<1:10:19, 8.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 84/594 [11:55<1:08:55, 8.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 84/594 [11:55<1:08:55, 8.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 84/594 [11:55<1:08:55, 8.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 84/594 [11:55<1:08:55, 8.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 84/594 [11:55<1:08:55, 8.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 85/594 [12:03<1:07:48, 7.99s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 85/594 [12:03<1:07:48, 7.99s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 85/594 [12:03<1:07:48, 7.99s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 85/594 [12:03<1:07:48, 7.99s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▍ | 85/594 [12:03<1:07:48, 7.99s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 86/594 [12:10<1:06:44, 7.88s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 86/594 [12:10<1:06:44, 7.88s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:47:44,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:47:44,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 87/594 [12:18<1:05:15, 7.72s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 87/594 [12:18<1:05:15, 7.72s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 87/594 [12:18<1:05:15, 7.72s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 87/594 [12:18<1:05:15, 7.72s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 87/594 [12:18<1:05:15, 7.72s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 88/594 [12:25<1:03:37, 7.54s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 88/594 [12:25<1:03:37, 7.54s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:47:58,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:47:58,600 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 89/594 [12:32<1:01:52, 7.35s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 89/594 [12:32<1:01:52, 7.35s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 89/594 [12:32<1:01:52, 7.35s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:06,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:06,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4134, 'learning_rate': 5.22e-06, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:11,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▌ | 91/594 [12:44<56:59, 6.80s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▌ | 91/594 [12:44<56:59, 6.80s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3388, 'learning_rate': 5.279999999999999e-06, 'epoch': 0.15} + 15%|████████████▌ | 91/594 [12:44<56:59, 6.80s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▌ | 91/594 [12:44<56:59, 6.80s/it]g-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:18,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:18,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:22,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:22,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:45:36,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 93/594 [12:55<50:47, 6.08s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 93/594 [12:55<50:47, 6.08s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 93/594 [12:55<50:47, 6.08s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:28,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:30,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:33,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:33,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:35,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:37,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:37,014 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:38,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:38,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:40,645 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:43,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:43,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:45,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:47,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:49,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:49,516 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3026, 'learning_rate': 5.82e-06, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:54,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:48:54,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:49:00,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:49:00,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4187, 'learning_rate': 5.8800000000000005e-06, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-01 10:49:00,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:49:00,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:49:00,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 102/594 [13:41<56:51, 6.93s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 102/594 [13:41<56:51, 6.93s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.232, 'learning_rate': 5.940000000000001e-06, 'epoch': 0.17} + 17%|█████████████▉ | 102/594 [13:41<56:51, 6.93s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 102/594 [13:41<56:51, 6.93s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 103/594 [13:51<1:03:57, 7.82s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 103/594 [13:51<1:03:57, 7.82s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2246, 'learning_rate': 6e-06, 'epoch': 0.17} + 17%|█████████████▋ | 103/594 [13:51<1:03:57, 7.82s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 103/594 [13:51<1:03:57, 7.82s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 104/594 [14:01<1:08:52, 8.43s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 104/594 [14:01<1:08:52, 8.43s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1768, 'learning_rate': 6.0600000000000004e-06, 'epoch': 0.17} + 18%|█████████████▊ | 104/594 [14:01<1:08:52, 8.43s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 104/594 [14:01<1:08:52, 8.43s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2036, 'learning_rate': 6.12e-06, 'epoch': 0.18} + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2184, 'learning_rate': 6.18e-06, 'epoch': 0.18} + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 105/594 [14:11<1:11:59, 8.83s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 107/594 [14:30<1:15:22, 9.29s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 107/594 [14:30<1:15:22, 9.29s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 107/594 [14:30<1:15:22, 9.29s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 107/594 [14:30<1:15:22, 9.29s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 107/594 [14:30<1:15:22, 9.29s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 108/594 [14:40<1:16:06, 9.40s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 108/594 [14:40<1:16:06, 9.40s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 108/594 [14:40<1:16:06, 9.40s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 108/594 [14:40<1:16:06, 9.40s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 108/594 [14:40<1:16:06, 9.40s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 109/594 [14:50<1:16:26, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 109/594 [14:50<1:16:26, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 109/594 [14:50<1:16:26, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 109/594 [14:50<1:16:26, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 109/594 [14:50<1:16:26, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 110/594 [14:59<1:16:46, 9.52s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 110/594 [14:59<1:16:46, 9.52s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 110/594 [14:59<1:16:46, 9.52s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 110/594 [14:59<1:16:46, 9.52s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 110/594 [14:59<1:16:46, 9.52s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|█���████████████▊ | 111/594 [15:09<1:16:31, 9.51s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 111/594 [15:09<1:16:31, 9.51s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 111/594 [15:09<1:16:31, 9.51s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 111/594 [15:09<1:16:31, 9.51s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 112/594 [15:18<1:16:01, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 112/594 [15:18<1:16:01, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3249, 'learning_rate': 6.54e-06, 'epoch': 0.19} + 19%|██████████████▉ | 112/594 [15:18<1:16:01, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 112/594 [15:18<1:16:01, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 112/594 [15:18<1:16:01, 9.46s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 113/594 [15:27<1:15:31, 9.42s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 113/594 [15:27<1:15:31, 9.42s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 113/594 [15:27<1:15:31, 9.42s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 113/594 [15:27<1:15:31, 9.42s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3379, 'learning_rate': 6.660000000000001e-06, 'epoch': 0.19} + g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 115/594 [15:46<1:14:44, 9.36s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 115/594 [15:46<1:14:44, 9.36s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 115/594 [15:46<1:14:44, 9.36s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 115/594 [15:46<1:14:44, 9.36s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 116/594 [15:55<1:14:11, 9.31s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 116/594 [15:55<1:14:11, 9.31s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2865, 'learning_rate': 6.78e-06, 'epoch': 0.2} + 20%|███████████████▍ | 116/594 [15:55<1:14:11, 9.31s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 116/594 [15:55<1:14:11, 9.31s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 116/594 [15:55<1:14:11, 9.31s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 117/594 [16:04<1:13:41, 9.27s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 117/594 [16:04<1:13:41, 9.27s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 117/594 [16:04<1:13:41, 9.27s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 117/594 [16:04<1:13:41, 9.27s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 118/594 [16:13<1:13:06, 9.22s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 118/594 [16:13<1:13:06, 9.22s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1678, 'learning_rate': 6.900000000000001e-06, 'epoch': 0.2} + 20%|███████████████▋ | 118/594 [16:13<1:13:06, 9.22s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 118/594 [16:13<1:13:06, 9.22s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 118/594 [16:13<1:13:06, 9.22s/it]g-point operations will not be computed-01 10:48:25,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 119/594 [16:23<1:12:29, 9.16s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 119/594 [16:23<1:12:29, 9.16s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▊ | 119/594 [16:23<1:12:29, 9.16s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 120/594 [16:31<1:11:56, 9.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 120/594 [16:31<1:11:56, 9.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1488, 'learning_rate': 7.0200000000000006e-06, 'epoch': 0.2} + 20%|███████████████▉ | 120/594 [16:31<1:11:56, 9.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 120/594 [16:31<1:11:56, 9.11s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3728, 'learning_rate': 7.08e-06, 'epoch': 0.2} + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████ | 121/594 [16:40<1:11:19, 9.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:22,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:22,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:22,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:22,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2362, 'learning_rate': 7.2e-06, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2868, 'learning_rate': 7.26e-06, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:52:31,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 125/594 [17:16<1:09:24, 8.88s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 125/594 [17:16<1:09:24, 8.88s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 125/594 [17:16<1:09:24, 8.88s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 125/594 [17:16<1:09:24, 8.88s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 125/594 [17:16<1:09:24, 8.88s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 126/594 [17:24<1:08:31, 8.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 126/594 [17:24<1:08:31, 8.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 126/594 [17:24<1:08:31, 8.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 126/594 [17:24<1:08:31, 8.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 127/594 [17:33<1:07:42, 8.70s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 127/594 [17:33<1:07:42, 8.70s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1797, 'learning_rate': 7.44e-06, 'epoch': 0.21} + 21%|████████████████▉ | 127/594 [17:33<1:07:42, 8.70s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 127/594 [17:33<1:07:42, 8.70s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 128/594 [17:41<1:06:35, 8.57s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 128/594 [17:41<1:06:35, 8.57s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1409, 'learning_rate': 7.5e-06, 'epoch': 0.22} + 22%|█████████████████ | 128/594 [17:41<1:06:35, 8.57s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 128/594 [17:41<1:06:35, 8.57s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 129/594 [17:49<1:05:37, 8.47s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 129/594 [17:49<1:05:37, 8.47s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2855, 'learning_rate': 7.5600000000000005e-06, 'epoch': 0.22} + 22%|█████████████████▏ | 129/594 [17:49<1:05:37, 8.47s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 129/594 [17:49<1:05:37, 8.47s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 130/594 [17:57<1:04:46, 8.38s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 130/594 [17:57<1:04:46, 8.38s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3087, 'learning_rate': 7.62e-06, 'epoch': 0.22} + 22%|█████████████████▎ | 130/594 [17:57<1:04:46, 8.38s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 130/594 [17:57<1:04:46, 8.38s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 130/594 [17:57<1:04:46, 8.38s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 131/594 [18:05<1:04:06, 8.31s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 131/594 [18:05<1:04:06, 8.31s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 131/594 [18:05<1:04:06, 8.31s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 131/594 [18:05<1:04:06, 8.31s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 131/594 [18:05<1:04:06, 8.31s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 132/594 [18:14<1:03:33, 8.26s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 132/594 [18:14<1:03:33, 8.26s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 132/594 [18:14<1:03:33, 8.26s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 132/594 [18:14<1:03:33, 8.26s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 132/594 [18:14<1:03:33, 8.26s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 133/594 [18:21<1:02:24, 8.12s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 133/594 [18:21<1:02:24, 8.12s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 133/594 [18:21<1:02:24, 8.12s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 133/594 [18:21<1:02:24, 8.12s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 133/594 [18:21<1:02:24, 8.12s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 134/594 [18:29<1:01:15, 7.99s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 134/594 [18:29<1:01:15, 7.99s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 134/594 [18:29<1:01:15, 7.99s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 134/594 [18:29<1:01:15, 7.99s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 134/594 [18:29<1:01:15, 7.99s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 135/594 [18:37<1:00:22, 7.89s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 135/594 [18:37<1:00:22, 7.89s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:11,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:11,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 136/594 [18:44<59:26, 7.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 136/594 [18:44<59:26, 7.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 136/594 [18:44<59:26, 7.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 136/594 [18:44<59:26, 7.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 136/594 [18:44<59:26, 7.79s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 137/594 [18:52<58:16, 7.65s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 137/594 [18:52<58:16, 7.65s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 137/594 [18:52<58:16, 7.65s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 137/594 [18:52<58:16, 7.65s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▋ | 137/594 [18:52<58:16, 7.65s/it]g-point operations will not be computed-01 10:51:53,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 138/594 [18:59<57:05, 7.51s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 138/594 [18:59<57:05, 7.51s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 138/594 [18:59<57:05, 7.51s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▊ | 138/594 [18:59<57:05, 7.51s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▉ | 139/594 [19:06<55:49, 7.36s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▉ | 139/594 [19:06<55:49, 7.36s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:39,696 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 140/594 [19:13<54:18, 7.18s/it]g-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 140/594 [19:13<54:18, 7.18s/it]g-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4473, 'learning_rate': 8.220000000000001e-06, 'epoch': 0.24} + 24%|███████████████████ | 140/594 [19:13<54:18, 7.18s/it]g-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:47,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:47,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3516, 'learning_rate': 8.28e-06, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:47,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:53,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:53,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4041, 'learning_rate': 8.340000000000001e-06, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:57,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:54:57,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 143/594 [19:30<47:19, 6.30s/it]g-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:01,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:04,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:04,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2525, 'learning_rate': 8.459999999999999e-06, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:07,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:07,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:54:29,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▊ | 145/594 [19:40<41:09, 5.50s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:09,850 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:11,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:09,850 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:11,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:09,850 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 146/594 [19:44<38:02, 5.10s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:13,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 147/594 [19:48<34:35, 4.64s/it]g-point operations will not be computed-01 10:55:13,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 147/594 [19:48<34:35, 4.64s/it]g-point operations will not be computed-01 10:55:13,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 147/594 [19:48<34:35, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:17,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 147/594 [19:48<34:35, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:17,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 148/594 [19:51<31:03, 4.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:20,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 149/594 [19:54<27:52, 3.76s/it]g-point operations will not be computed-01 10:55:20,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 149/594 [19:54<27:52, 3.76s/it]g-point operations will not be computed-01 10:55:20,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:24,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:22,990 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:24,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:22,990 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 150/594 [19:56<25:52, 3.50s/it]g-point operations will not be computed-01 10:55:22,990 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 150/594 [19:56<25:52, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 150/594 [19:56<25:52, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:33,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:55:33,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▌ | 151/594 [20:07<41:12, 5.58s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▌ | 151/594 [20:07<41:12, 5.58s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▌ | 151/594 [20:07<41:12, 5.58s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▌ | 151/594 [20:07<41:12, 5.58s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▌ | 151/594 [20:07<41:12, 5.58s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 152/594 [20:17<50:53, 6.91s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 152/594 [20:17<50:53, 6.91s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 152/594 [20:17<50:53, 6.91s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 152/594 [20:17<50:53, 6.91s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 152/594 [20:17<50:53, 6.91s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 153/594 [20:27<57:32, 7.83s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 153/594 [20:27<57:32, 7.83s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 153/594 [20:27<57:32, 7.83s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 153/594 [20:27<57:32, 7.83s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 154/594 [20:37<1:01:47, 8.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 154/594 [20:37<1:01:47, 8.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0987, 'learning_rate': 9.06e-06, 'epoch': 0.26} + 26%|████████████████████▍ | 154/594 [20:37<1:01:47, 8.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 154/594 [20:37<1:01:47, 8.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 155/594 [20:46<1:04:30, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 155/594 [20:46<1:04:30, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1229, 'learning_rate': 9.12e-06, 'epoch': 0.26} + 26%|████████████████████▌ | 155/594 [20:46<1:04:30, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 155/594 [20:46<1:04:30, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 155/594 [20:46<1:04:30, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 156/594 [20:56<1:06:14, 9.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 156/594 [20:56<1:06:14, 9.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 156/594 [20:56<1:06:14, 9.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 156/594 [20:56<1:06:14, 9.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▉ | 157/594 [21:06<1:07:22, 9.25s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▉ | 157/594 [21:06<1:07:22, 9.25s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2867, 'learning_rate': 9.24e-06, 'epoch': 0.26} + 26%|████████████████████▉ | 157/594 [21:06<1:07:22, 9.25s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▉ | 157/594 [21:06<1:07:22, 9.25s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 158/594 [21:15<1:08:02, 9.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 158/594 [21:15<1:08:02, 9.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1166, 'learning_rate': 9.3e-06, 'epoch': 0.27} + 27%|█████████████████████ | 158/594 [21:15<1:08:02, 9.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 158/594 [21:15<1:08:02, 9.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 159/594 [21:25<1:08:20, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 159/594 [21:25<1:08:20, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0816, 'learning_rate': 9.36e-06, 'epoch': 0.27} + 27%|█████████████████████▏ | 159/594 [21:25<1:08:20, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 159/594 [21:25<1:08:20, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 160/594 [21:35<1:08:40, 9.49s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 160/594 [21:35<1:08:40, 9.49s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1966, 'learning_rate': 9.42e-06, 'epoch': 0.27} + 27%|█████████████████████▎ | 160/594 [21:35<1:08:40, 9.49s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 160/594 [21:35<1:08:40, 9.49s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 160/594 [21:35<1:08:40, 9.49s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 161/594 [21:44<1:08:26, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 161/594 [21:44<1:08:26, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 161/594 [21:44<1:08:26, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 161/594 [21:44<1:08:26, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1073, 'learning_rate': 9.54e-06, 'epoch': 0.27} + g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 163/594 [22:03<1:08:05, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 163/594 [22:03<1:08:05, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 163/594 [22:03<1:08:05, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 163/594 [22:03<1:08:05, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 163/594 [22:03<1:08:05, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▊ | 164/594 [22:12<1:07:50, 9.47s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▊ | 164/594 [22:12<1:07:50, 9.47s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▊ | 164/594 [22:12<1:07:50, 9.47s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▊ | 164/594 [22:12<1:07:50, 9.47s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 165/594 [22:22<1:07:23, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 165/594 [22:22<1:07:23, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2105, 'learning_rate': 9.72e-06, 'epoch': 0.28} + 28%|█████████████████████▉ | 165/594 [22:22<1:07:23, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 165/594 [22:22<1:07:23, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 165/594 [22:22<1:07:23, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 166/594 [22:31<1:07:05, 9.41s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 166/594 [22:31<1:07:05, 9.41s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 166/594 [22:31<1:07:05, 9.41s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 166/594 [22:31<1:07:05, 9.41s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 167/594 [22:40<1:06:40, 9.37s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 167/594 [22:40<1:06:40, 9.37s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1639, 'learning_rate': 9.84e-06, 'epoch': 0.28} + 28%|██████████████████████▏ | 167/594 [22:40<1:06:40, 9.37s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 167/594 [22:40<1:06:40, 9.37s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▏ | 167/594 [22:40<1:06:40, 9.37s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 168/594 [22:50<1:06:19, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|████████���█████████████▎ | 168/594 [22:50<1:06:19, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 168/594 [22:50<1:06:19, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 168/594 [22:50<1:06:19, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 169/594 [22:59<1:06:08, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 169/594 [22:59<1:06:08, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2094, 'learning_rate': 9.960000000000001e-06, 'epoch': 0.28} + 28%|██████████████████████▍ | 169/594 [22:59<1:06:08, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 169/594 [22:59<1:06:08, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▍ | 169/594 [22:59<1:06:08, 9.34s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▌ | 170/594 [23:08<1:05:43, 9.30s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▌ | 170/594 [23:08<1:05:43, 9.30s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▌ | 170/594 [23:08<1:05:43, 9.30s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▌ | 170/594 [23:08<1:05:43, 9.30s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1226, 'learning_rate': 1.008e-05, 'epoch': 0.29} + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1309, 'learning_rate': 1.0140000000000001e-05, 'epoch': 0.29} + 29%|██████████████████████▋ | 171/594 [23:17<1:05:16, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:59:01,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 10:59:01,845 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 173/594 [23:35<1:03:53, 9.11s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 173/594 [23:35<1:03:53, 9.11s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 173/594 [23:35<1:03:53, 9.11s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 173/594 [23:35<1:03:53, 9.11s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 173/594 [23:35<1:03:53, 9.11s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|��██████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2783, 'learning_rate': 1.032e-05, 'epoch': 0.29} + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 174/594 [23:44<1:02:51, 8.98s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 176/594 [24:02<1:01:59, 8.90s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 176/594 [24:02<1:01:59, 8.90s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 176/594 [24:02<1:01:59, 8.90s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 176/594 [24:02<1:01:59, 8.90s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 176/594 [24:02<1:01:59, 8.90s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 177/594 [24:10<1:00:54, 8.76s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 177/594 [24:10<1:00:54, 8.76s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 177/594 [24:10<1:00:54, 8.76s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 177/594 [24:10<1:00:54, 8.76s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 178/594 [24:19<59:59, 8.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 178/594 [24:19<59:59, 8.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1372, 'learning_rate': 1.05e-05, 'epoch': 0.3} + 30%|████████████████████████▎ | 178/594 [24:19<59:59, 8.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 178/594 [24:19<59:59, 8.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 178/594 [24:19<59:59, 8.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 179/594 [24:27<59:15, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 179/594 [24:27<59:15, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 179/594 [24:27<59:15, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 179/594 [24:27<59:15, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 179/594 [24:27<59:15, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 180/594 [24:35<58:32, 8.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 180/594 [24:35<58:32, 8.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 180/594 [24:35<58:32, 8.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 180/594 [24:35<58:32, 8.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 180/594 [24:35<58:32, 8.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▋ | 181/594 [24:43<57:43, 8.39s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▋ | 181/594 [24:43<57:43, 8.39s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▋ | 181/594 [24:43<57:43, 8.39s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▋ | 181/594 [24:43<57:43, 8.39s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▋ | 181/594 [24:43<57:43, 8.39s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 182/594 [24:51<56:56, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 182/594 [24:51<56:56, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 182/594 [24:51<56:56, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 182/594 [24:51<56:56, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▊ | 182/594 [24:51<56:56, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▉ | 183/594 [24:59<56:13, 8.21s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▉ | 183/594 [24:59<56:13, 8.21s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▉ | 183/594 [24:59<56:13, 8.21s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▉ | 183/594 [24:59<56:13, 8.21s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▉ | 183/594 [24:59<56:13, 8.21s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 184/594 [25:07<55:09, 8.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 184/594 [25:07<55:09, 8.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|██████████████��██████████ | 184/594 [25:07<55:09, 8.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 184/594 [25:07<55:09, 8.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████ | 184/594 [25:07<55:09, 8.07s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 185/594 [25:15<54:08, 7.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 185/594 [25:15<54:08, 7.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 185/594 [25:15<54:08, 7.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:00:51,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:00:51,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2284, 'learning_rate': 1.098e-05, 'epoch': 0.31} +[WARNING|modeling_utils.py:388] 2022-03-01 11:00:51,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:00:51,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:00:51,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 187/594 [25:30<52:09, 7.69s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 187/594 [25:30<52:09, 7.69s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 187/594 [25:30<52:09, 7.69s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 187/594 [25:30<52:09, 7.69s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 187/594 [25:30<52:09, 7.69s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████��███████████▋ | 188/594 [25:37<51:02, 7.54s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:09,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:09,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:09,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 189/594 [25:44<49:38, 7.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 189/594 [25:44<49:38, 7.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 189/594 [25:44<49:38, 7.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:19,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:19,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1684, 'learning_rate': 1.1220000000000001e-05, 'epoch': 0.32} +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:19,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:25,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:25,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4552, 'learning_rate': 1.128e-05, 'epoch': 0.32} +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:25,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:31,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:31,594 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3806, 'learning_rate': 1.134e-05, 'epoch': 0.32} +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:35,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:35,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▎ | 193/594 [26:08<41:44, 6.25s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:39,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:42,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:42,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3502, 'learning_rate': 1.146e-05, 'epoch': 0.33} +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:45,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:45,882 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 195/594 [26:18<37:15, 5.60s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:49,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:49,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:51,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:53,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:53,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:55,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:58,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:58,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:59,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:01:59,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:02,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:04,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:04,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6242, 'learning_rate': 1.182e-05, 'epoch': 0.34} +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:09,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:09,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:09,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:15,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:15,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:15,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:15,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:02:15,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 202/594 [26:57<47:18, 7.24s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 202/594 [26:57<47:18, 7.24s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 202/594 [26:57<47:18, 7.24s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 202/594 [26:57<47:18, 7.24s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▌ | 202/594 [26:57<47:18, 7.24s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 203/594 [27:07<52:47, 8.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 203/594 [27:07<52:47, 8.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 203/594 [27:07<52:47, 8.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 203/594 [27:07<52:47, 8.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▋ | 203/594 [27:07<52:47, 8.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 204/594 [27:17<56:44, 8.73s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 204/594 [27:17<56:44, 8.73s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 204/594 [27:17<56:44, 8.73s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 204/594 [27:17<56:44, 8.73s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 204/594 [27:17<56:44, 8.73s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 205/594 [27:27<59:18, 9.15s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 205/594 [27:27<59:18, 9.15s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 205/594 [27:27<59:18, 9.15s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 205/594 [27:27<59:18, 9.15s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 206/594 [27:38<1:00:58, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 206/594 [27:38<1:00:58, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1126, 'learning_rate': 1.2180000000000002e-05, 'epoch': 0.35} + 35%|███████████████████████████▍ | 206/594 [27:38<1:00:58, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 206/594 [27:38<1:00:58, 9.43s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 207/594 [27:47<1:01:19, 9.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 207/594 [27:47<1:01:19, 9.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1722, 'learning_rate': 1.224e-05, 'epoch': 0.35} + 35%|███████████████████████████▌ | 207/594 [27:47<1:01:19, 9.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▌ | 207/594 [27:47<1:01:19, 9.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 208/594 [27:57<1:01:51, 9.61s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 208/594 [27:57<1:01:51, 9.61s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1502, 'learning_rate': 1.2299999999999999e-05, 'epoch': 0.35} + 35%|███████████████████████████▋ | 208/594 [27:57<1:01:51, 9.61s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▋ | 208/594 [27:57<1:01:51, 9.61s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 209/594 [28:07<1:02:07, 9.68s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 209/594 [28:07<1:02:07, 9.68s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1738, 'learning_rate': 1.236e-05, 'epoch': 0.35} + 35%|███████████████████████████▊ | 209/594 [28:07<1:02:07, 9.68s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 209/594 [28:07<1:02:07, 9.68s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▊ | 209/594 [28:07<1:02:07, 9.68s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 210/594 [28:16<1:01:33, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 210/594 [28:16<1:01:33, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 210/594 [28:16<1:01:33, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▉ | 210/594 [28:16<1:01:33, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████ | 211/594 [28:26<1:01:28, 9.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████ | 211/594 [28:26<1:01:28, 9.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.264, 'learning_rate': 1.2479999999999999e-05, 'epoch': 0.35} + 36%|████████████████████████████ | 211/594 [28:26<1:01:28, 9.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████ | 211/594 [28:26<1:01:28, 9.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████ | 211/594 [28:26<1:01:28, 9.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▏ | 212/594 [28:36<1:01:16, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▏ | 212/594 [28:36<1:01:16, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▏ | 212/594 [28:36<1:01:16, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▏ | 212/594 [28:36<1:01:16, 9.62s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▎ | 213/594 [28:45<1:00:45, 9.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▎ | 213/594 [28:45<1:00:45, 9.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2049, 'learning_rate': 1.26e-05, 'epoch': 0.36} + 36%|████████████████████████████▎ | 213/594 [28:45<1:00:45, 9.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▎ | 213/594 [28:45<1:00:45, 9.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▎ | 213/594 [28:45<1:00:45, 9.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 214/594 [28:54<1:00:01, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 214/594 [28:54<1:00:01, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 214/594 [28:54<1:00:01, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 214/594 [28:54<1:00:01, 9.48s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 215/594 [29:04<59:20, 9.40s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 215/594 [29:04<59:20, 9.40s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1661, 'learning_rate': 1.272e-05, 'epoch': 0.36} + 36%|█████████████████████████████▎ | 215/594 [29:04<59:20, 9.40s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 215/594 [29:04<59:20, 9.40s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 215/594 [29:04<59:20, 9.40s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 216/594 [29:13<58:40, 9.31s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 216/594 [29:13<58:40, 9.31s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 216/594 [29:13<58:40, 9.31s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 216/594 [29:13<58:40, 9.31s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 217/594 [29:22<58:09, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 217/594 [29:22<58:09, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0784, 'learning_rate': 1.284e-05, 'epoch': 0.36} + 37%|█████████████████████████████▌ | 217/594 [29:22<58:09, 9.26s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:04:59,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:04:59,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1899, 'learning_rate': 1.29e-05, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-01 11:04:59,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:04:59,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:04:59,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 219/594 [29:40<56:53, 9.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 219/594 [29:40<56:53, 9.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1644, 'learning_rate': 1.296e-05, 'epoch': 0.37} + 37%|█████████████████████████████▊ | 219/594 [29:40<56:53, 9.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 219/594 [29:40<56:53, 9.10s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 220/594 [29:49<56:17, 9.03s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 220/594 [29:49<56:17, 9.03s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2537, 'learning_rate': 1.302e-05, 'epoch': 0.37} + 37%|██████████████████████████████ | 220/594 [29:49<56:17, 9.03s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 220/594 [29:49<56:17, 9.03s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 220/594 [29:49<56:17, 9.03s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▏ | 221/594 [29:57<55:54, 8.99s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:05:30,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:05:30,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 222/594 [30:06<55:25, 8.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 222/594 [30:06<55:25, 8.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2814, 'learning_rate': 1.314e-05, 'epoch': 0.37} + 37%|██████████████████████████████▎ | 222/594 [30:06<55:25, 8.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 222/594 [30:06<55:25, 8.94s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 223/594 [30:15<54:51, 8.87s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 223/594 [30:15<54:51, 8.87s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1264, 'learning_rate': 1.32e-05, 'epoch': 0.37} + 38%|██████████████████████████████▍ | 223/594 [30:15<54:51, 8.87s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 223/594 [30:15<54:51, 8.87s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 224/594 [30:24<54:22, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 224/594 [30:24<54:22, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2383, 'learning_rate': 1.326e-05, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 224/594 [30:24<54:22, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 224/594 [30:24<54:22, 8.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 225/594 [30:33<54:35, 8.88s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 225/594 [30:33<54:35, 8.88s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4086, 'learning_rate': 1.3320000000000001e-05, 'epoch': 0.38} + 38%|██████████████████████████████▋ | 225/594 [30:33<54:35, 8.88s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 225/594 [30:33<54:35, 8.88s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 226/594 [30:41<53:40, 8.75s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 226/594 [30:41<53:40, 8.75s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1876, 'learning_rate': 1.338e-05, 'epoch': 0.38} + 38%|██████████████████████████████▊ | 226/594 [30:41<53:40, 8.75s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 226/594 [30:41<53:40, 8.75s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▉ | 227/594 [30:50<52:48, 8.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▉ | 227/594 [30:50<52:48, 8.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1319, 'learning_rate': 1.344e-05, 'epoch': 0.38} + 38%|██████████████████████████████▉ | 227/594 [30:50<52:48, 8.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▉ | 227/594 [30:50<52:48, 8.63s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 228/594 [30:58<52:16, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 228/594 [30:58<52:16, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1903, 'learning_rate': 1.3500000000000001e-05, 'epoch': 0.38} + 38%|███████████████████████████████ | 228/594 [30:58<52:16, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 228/594 [30:58<52:16, 8.57s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▏ | 229/594 [31:06<51:44, 8.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▏ | 229/594 [31:06<51:44, 8.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3229, 'learning_rate': 1.356e-05, 'epoch': 0.39} + 39%|███████████████████████████████▏ | 229/594 [31:06<51:44, 8.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▏ | 229/594 [31:06<51:44, 8.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▏ | 229/594 [31:06<51:44, 8.51s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 230/594 [31:15<51:10, 8.44s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 230/594 [31:15<51:10, 8.44s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 230/594 [31:15<51:10, 8.44s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 230/594 [31:15<51:10, 8.44s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 230/594 [31:15<51:10, 8.44s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 231/594 [31:23<50:34, 8.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 231/594 [31:23<50:34, 8.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 231/594 [31:23<50:34, 8.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 231/594 [31:23<50:34, 8.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 231/594 [31:23<50:34, 8.36s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 232/594 [31:31<50:00, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 232/594 [31:31<50:00, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 232/594 [31:31<50:00, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 232/594 [31:31<50:00, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 232/594 [31:31<50:00, 8.29s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▊ | 233/594 [31:39<49:17, 8.19s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▊ | 233/594 [31:39<49:17, 8.19s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▊ | 233/594 [31:39<49:17, 8.19s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▊ | 233/594 [31:39<49:17, 8.19s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████���████████████▊ | 233/594 [31:39<49:17, 8.19s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 234/594 [31:47<48:30, 8.09s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 234/594 [31:47<48:30, 8.09s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 234/594 [31:47<48:30, 8.09s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 234/594 [31:47<48:30, 8.09s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 234/594 [31:47<48:30, 8.09s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 235/594 [31:54<47:37, 7.96s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 235/594 [31:54<47:37, 7.96s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 235/594 [31:54<47:37, 7.96s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 235/594 [31:54<47:37, 7.96s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 235/594 [31:54<47:37, 7.96s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 236/594 [32:02<46:38, 7.82s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:07:34,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:07:34,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:07:34,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 237/594 [32:09<45:30, 7.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 237/594 [32:09<45:30, 7.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 237/594 [32:09<45:30, 7.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 237/594 [32:09<45:30, 7.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 237/594 [32:09<45:30, 7.65s/it]g-point operations will not be computed-01 10:55:27,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 238/594 [32:16<44:23, 7.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 238/594 [32:16<44:23, 7.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 238/594 [32:16<44:23, 7.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 238/594 [32:16<44:23, 7.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 239/594 [32:23<43:20, 7.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 239/594 [32:23<43:20, 7.33s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:07:56,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:07:56,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 240/594 [32:30<42:01, 7.12s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 240/594 [32:30<42:01, 7.12s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:03,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:03,354 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 241/594 [32:36<40:21, 6.86s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 241/594 [32:36<40:21, 6.86s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:09,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:09,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 242/594 [32:42<38:31, 6.57s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 242/594 [32:42<38:31, 6.57s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:15,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:15,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 243/594 [32:48<36:46, 6.29s/it]g-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:19,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:21,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:21,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3072, 'learning_rate': 1.446e-05, 'epoch': 0.41} +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:25,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:25,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:07:46,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 245/594 [32:57<32:22, 5.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:27,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:29,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:27,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:29,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:27,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 246/594 [33:02<29:51, 5.15s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:31,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:33,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:31,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:33,204 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:31,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 247/594 [33:05<27:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:34,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▋ | 247/594 [33:05<27:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:34,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 248/594 [33:09<24:36, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:38,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:39,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:38,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:39,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:38,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:41,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:40,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:41,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:40,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 250/594 [33:14<20:16, 3.54s/it]g-point operations will not be computed-01 11:08:40,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 250/594 [33:14<20:16, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 250/594 [33:14<20:16, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:50,900 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:08:50,900 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▏ | 251/594 [33:25<32:06, 5.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▏ | 251/594 [33:25<32:06, 5.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▏ | 251/594 [33:25<32:06, 5.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▏ | 251/594 [33:25<32:06, 5.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▏ | 251/594 [33:25<32:06, 5.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 252/594 [33:35<39:43, 6.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 252/594 [33:35<39:43, 6.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 252/594 [33:35<39:43, 6.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 252/594 [33:35<39:43, 6.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 252/594 [33:35<39:43, 6.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 253/594 [33:45<44:33, 7.84s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 253/594 [33:45<44:33, 7.84s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 253/594 [33:45<44:33, 7.84s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 253/594 [33:45<44:33, 7.84s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▌ | 253/594 [33:45<44:33, 7.84s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2503, 'learning_rate': 1.5120000000000001e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 254/594 [33:55<47:53, 8.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 256/594 [34:14<51:31, 9.15s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 256/594 [34:14<51:31, 9.15s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1818, 'learning_rate': 1.518e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▉ | 256/594 [34:14<51:31, 9.15s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 256/594 [34:14<51:31, 9.15s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 256/594 [34:14<51:31, 9.15s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 257/594 [34:24<52:17, 9.31s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 257/594 [34:24<52:17, 9.31s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 257/594 [34:24<52:17, 9.31s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 257/594 [34:24<52:17, 9.31s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 257/594 [34:24<52:17, 9.31s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 258/594 [34:33<52:39, 9.40s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 258/594 [34:33<52:39, 9.40s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 258/594 [34:33<52:39, 9.40s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 258/594 [34:33<52:39, 9.40s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 259/594 [34:43<53:04, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 259/594 [34:43<53:04, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2211, 'learning_rate': 1.5360000000000002e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▎ | 259/594 [34:43<53:04, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▎ | 259/594 [34:43<53:04, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 260/594 [34:53<52:57, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████��██████████████████████████▍ | 260/594 [34:53<52:57, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2337, 'learning_rate': 1.542e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▍ | 260/594 [34:53<52:57, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 260/594 [34:53<52:57, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 260/594 [34:53<52:57, 9.51s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 261/594 [35:02<52:27, 9.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 261/594 [35:02<52:27, 9.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 261/594 [35:02<52:27, 9.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▌ | 261/594 [35:02<52:27, 9.45s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 262/594 [35:11<52:07, 9.42s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 262/594 [35:11<52:07, 9.42s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1336, 'learning_rate': 1.554e-05, 'epoch': 0.44} + 44%|███████████████████████████████████▋ | 262/594 [35:11<52:07, 9.42s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 262/594 [35:11<52:07, 9.42s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 263/594 [35:21<51:53, 9.41s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 263/594 [35:21<51:53, 9.41s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2485, 'learning_rate': 1.56e-05, 'epoch': 0.44} + 44%|███████���███████████████████████████▊ | 263/594 [35:21<51:53, 9.41s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▊ | 263/594 [35:21<51:53, 9.41s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 264/594 [35:30<51:28, 9.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 264/594 [35:30<51:28, 9.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 3.9808, 'learning_rate': 1.5660000000000003e-05, 'epoch': 0.44} + 44%|████████████████████████████████████ | 264/594 [35:30<51:28, 9.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 264/594 [35:30<51:28, 9.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 264/594 [35:30<51:28, 9.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 265/594 [35:39<51:07, 9.32s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 265/594 [35:39<51:07, 9.32s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 265/594 [35:39<51:07, 9.32s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 265/594 [35:39<51:07, 9.32s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▏ | 265/594 [35:39<51:07, 9.32s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 266/594 [35:48<50:40, 9.27s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 266/594 [35:48<50:40, 9.27s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 266/594 [35:48<50:40, 9.27s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 266/594 [35:48<50:40, 9.27s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▍ | 267/594 [35:58<50:16, 9.23s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▍ | 267/594 [35:58<50:16, 9.23s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:11:30,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:11:30,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:11:30,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 268/594 [36:07<49:49, 9.17s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 268/594 [36:07<49:49, 9.17s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 268/594 [36:07<49:49, 9.17s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 268/594 [36:07<49:49, 9.17s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 269/594 [36:16<49:30, 9.14s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 269/594 [36:16<49:30, 9.14s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2437, 'learning_rate': 1.596e-05, 'epoch': 0.45} + 45%|████████████████████████████████████▋ | 269/594 [36:16<49:30, 9.14s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 269/594 [36:16<49:30, 9.14s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|██████████████████████��█████████████▋ | 269/594 [36:16<49:30, 9.14s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▊ | 270/594 [36:25<49:12, 9.11s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:11:57,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:11:57,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:11:57,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 271/594 [36:34<48:44, 9.06s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 271/594 [36:34<48:44, 9.06s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 271/594 [36:34<48:44, 9.06s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 271/594 [36:34<48:44, 9.06s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 272/594 [36:42<48:12, 8.98s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 272/594 [36:42<48:12, 8.98s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3086, 'learning_rate': 1.614e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████ | 272/594 [36:42<48:12, 8.98s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 272/594 [36:42<48:12, 8.98s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 273/594 [36:51<47:42, 8.92s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▏ | 273/594 [36:51<47:42, 8.92s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.165, 'learning_rate': 1.62e-05, 'epoch': 0.46} +[WARNING|modeling_utils.py:388] 2022-03-01 11:12:26,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 274/594 [37:00<47:23, 8.89s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 274/594 [37:00<47:23, 8.89s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1709, 'learning_rate': 1.626e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████▎ | 274/594 [37:00<47:23, 8.89s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 274/594 [37:00<47:23, 8.89s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 275/594 [37:09<47:42, 8.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 275/594 [37:09<47:42, 8.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2307, 'learning_rate': 1.6320000000000003e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████▌ | 275/594 [37:09<47:42, 8.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▌ | 275/594 [37:09<47:42, 8.97s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 276/594 [37:18<46:58, 8.86s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 276/594 [37:18<46:58, 8.86s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1744, 'learning_rate': 1.6380000000000002e-05, 'epoch': 0.46} + 46%|█████████████████████████████████████▋ | 276/594 [37:18<46:58, 8.86s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 276/594 [37:18<46:58, 8.86s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 276/594 [37:18<46:58, 8.86s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 277/594 [37:26<46:25, 8.79s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 277/594 [37:26<46:25, 8.79s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 277/594 [37:26<46:25, 8.79s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 277/594 [37:26<46:25, 8.79s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 278/594 [37:35<45:54, 8.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 278/594 [37:35<45:54, 8.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0863, 'learning_rate': 1.65e-05, 'epoch': 0.47} + 47%|█████████████████████████████████████▉ | 278/594 [37:35<45:54, 8.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 278/594 [37:35<45:54, 8.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 278/594 [37:35<45:54, 8.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 279/594 [37:43<45:10, 8.61s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 279/594 [37:43<45:10, 8.61s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 279/594 [37:43<45:10, 8.61s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 279/594 [37:43<45:10, 8.61s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 280/594 [37:52<44:47, 8.56s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|███���██████████████████████████████████▏ | 280/594 [37:52<44:47, 8.56s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3489, 'learning_rate': 1.6620000000000004e-05, 'epoch': 0.47} + 47%|██████████████████████████████████████▏ | 280/594 [37:52<44:47, 8.56s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 280/594 [37:52<44:47, 8.56s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 281/594 [38:00<44:12, 8.47s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 281/594 [38:00<44:12, 8.47s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.347, 'learning_rate': 1.6680000000000003e-05, 'epoch': 0.47} + 47%|██████████████████████████████████████▎ | 281/594 [38:00<44:12, 8.47s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 281/594 [38:00<44:12, 8.47s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 282/594 [38:08<43:29, 8.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 282/594 [38:08<43:29, 8.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2189, 'learning_rate': 1.6740000000000002e-05, 'epoch': 0.47} + 47%|██████████████████████████████████████▍ | 282/594 [38:08<43:29, 8.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 282/594 [38:08<43:29, 8.36s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 283/594 [38:16<42:48, 8.26s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 283/594 [38:16<42:48, 8.26s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.13, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▌ | 283/594 [38:16<42:48, 8.26s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 283/594 [38:16<42:48, 8.26s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 284/594 [38:24<42:08, 8.16s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 284/594 [38:24<42:08, 8.16s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1049, 'learning_rate': 1.686e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▋ | 284/594 [38:24<42:08, 8.16s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 284/594 [38:24<42:08, 8.16s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 285/594 [38:32<41:22, 8.03s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 285/594 [38:32<41:22, 8.03s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3792, 'learning_rate': 1.6919999999999997e-05, 'epoch': 0.48} + 48%|██████████████████████████████████████▊ | 285/594 [38:32<41:22, 8.03s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 285/594 [38:32<41:22, 8.03s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 285/594 [38:32<41:22, 8.03s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 286/594 [38:39<40:42, 7.93s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 286/594 [38:39<40:42, 7.93s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████ | 286/594 [38:39<40:42, 7.93s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:15,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:15,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.469, 'learning_rate': 1.704e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:15,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:15,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▎ | 288/594 [38:54<38:53, 7.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▎ | 288/594 [38:54<38:53, 7.62s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 3.9562, 'learning_rate': 1.71e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:28,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 289/594 [39:01<37:45, 7.43s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 289/594 [39:01<37:45, 7.43s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.209, 'learning_rate': 1.716e-05, 'epoch': 0.49} + 49%|███████████████████████████████████████▍ | 289/594 [39:01<37:45, 7.43s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 289/594 [39:01<37:45, 7.43s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▍ | 289/594 [39:01<37:45, 7.43s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 290/594 [39:08<36:39, 7.23s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:40,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:40,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▋ | 291/594 [39:14<35:24, 7.01s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▋ | 291/594 [39:14<35:24, 7.01s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:46,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:46,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 292/594 [39:20<33:48, 6.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 292/594 [39:20<33:48, 6.72s/it]g-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:52,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:52,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:14:52,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:08:45,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 293/594 [39:26<32:04, 6.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 293/594 [39:26<32:04, 6.39s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:00,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:00,083 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:02,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:02,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:02,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:14:56,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 295/594 [39:36<28:19, 5.68s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:15:06,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:08,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:06,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:08,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:06,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 296/594 [39:40<26:06, 5.26s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:15:10,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:12,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:10,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:12,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:10,270 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 297/594 [39:44<23:49, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:15:13,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 298/594 [39:48<21:30, 4.36s/it]g-point operations will not be computed-01 11:15:13,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 298/594 [39:48<21:30, 4.36s/it]g-point operations will not be computed-01 11:15:13,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:18,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:17,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:18,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:17,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:20,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:19,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 300/594 [39:53<17:30, 3.57s/it]g-point operations will not be computed-01 11:15:19,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 300/594 [39:53<17:30, 3.57s/it]g-point operations will not be computed-01 11:15:19,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 300/594 [39:53<17:30, 3.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|████████████████████████████████████████▉ | 300/594 [39:53<17:30, 3.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:15:29,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████ | 301/594 [40:04<27:28, 5.63s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████ | 301/594 [40:04<27:28, 5.63s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.284, 'learning_rate': 1.7879999999999998e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████ | 301/594 [40:04<27:28, 5.63s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████ | 301/594 [40:04<27:28, 5.63s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████ | 301/594 [40:04<27:28, 5.63s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 302/594 [40:14<33:55, 6.97s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 302/594 [40:14<33:55, 6.97s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 302/594 [40:14<33:55, 6.97s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 302/594 [40:14<33:55, 6.97s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 302/594 [40:14<33:55, 6.97s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▎ | 303/594 [40:24<38:08, 7.86s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▎ | 303/594 [40:24<38:08, 7.86s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▎ | 303/594 [40:24<38:08, 7.86s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▎ | 303/594 [40:24<38:08, 7.86s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 304/594 [40:33<40:56, 8.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 304/594 [40:33<40:56, 8.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0805, 'learning_rate': 1.806e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 304/594 [40:33<40:56, 8.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 304/594 [40:33<40:56, 8.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 305/594 [40:43<42:48, 8.89s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 305/594 [40:43<42:48, 8.89s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2797, 'learning_rate': 1.812e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▌ | 305/594 [40:43<42:48, 8.89s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▌ | 305/594 [40:43<42:48, 8.89s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 306/594 [40:53<44:01, 9.17s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 306/594 [40:53<44:01, 9.17s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2722, 'learning_rate': 1.818e-05, 'epoch': 0.51} + 52%|█████████████████████████████████████████▋ | 306/594 [40:53<44:01, 9.17s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 306/594 [40:53<44:01, 9.17s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 306/594 [40:53<44:01, 9.17s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 307/594 [41:03<44:39, 9.34s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 307/594 [41:03<44:39, 9.34s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 307/594 [41:03<44:39, 9.34s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 307/594 [41:03<44:39, 9.34s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 308/594 [41:13<44:58, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 308/594 [41:13<44:58, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2789, 'learning_rate': 1.83e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████ | 308/594 [41:13<44:58, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 308/594 [41:13<44:58, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 309/594 [41:22<45:05, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 309/594 [41:22<45:05, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1761, 'learning_rate': 1.836e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████▏ | 309/594 [41:22<45:05, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▏ | 309/594 [41:22<45:05, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 310/594 [41:32<45:00, 9.51s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 310/594 [41:32<45:00, 9.51s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.261, 'learning_rate': 1.842e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████▎ | 310/594 [41:32<45:00, 9.51s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▎ | 310/594 [41:32<45:00, 9.51s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 311/594 [41:41<44:44, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 311/594 [41:41<44:44, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1181, 'learning_rate': 1.848e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████▍ | 311/594 [41:41<44:44, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 311/594 [41:41<44:44, 9.49s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 312/594 [41:51<44:30, 9.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 312/594 [41:51<44:30, 9.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1469, 'learning_rate': 1.854e-05, 'epoch': 0.52} + 53%|██████████████████████████████████████████▌ | 312/594 [41:51<44:30, 9.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 312/594 [41:51<44:30, 9.47s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 313/594 [42:00<44:13, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 313/594 [42:00<44:13, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.187, 'learning_rate': 1.86e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▋ | 313/594 [42:00<44:13, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 313/594 [42:00<44:13, 9.44s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 314/594 [42:09<43:44, 9.37s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 314/594 [42:09<43:44, 9.37s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1858, 'learning_rate': 1.866e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▊ | 314/594 [42:09<43:44, 9.37s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 314/594 [42:09<43:44, 9.37s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 314/594 [42:09<43:44, 9.37s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 315/594 [42:19<43:32, 9.36s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 315/594 [42:19<43:32, 9.36s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 315/594 [42:19<43:32, 9.36s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 315/594 [42:19<43:32, 9.36s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 315/594 [42:19<43:32, 9.36s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██��████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1419, 'learning_rate': 1.884e-05, 'epoch': 0.53} + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 316/594 [42:28<43:10, 9.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 318/594 [42:46<42:20, 9.20s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 318/594 [42:46<42:20, 9.20s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 318/594 [42:46<42:20, 9.20s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 318/594 [42:46<42:20, 9.20s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 318/594 [42:46<42:20, 9.20s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 319/594 [42:55<41:57, 9.16s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 319/594 [42:55<41:57, 9.16s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 319/594 [42:55<41:57, 9.16s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 319/594 [42:55<41:57, 9.16s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 320/594 [43:04<41:39, 9.12s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 320/594 [43:04<41:39, 9.12s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2325, 'learning_rate': 1.902e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▋ | 320/594 [43:04<41:39, 9.12s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 320/594 [43:04<41:39, 9.12s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 320/594 [43:04<41:39, 9.12s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 321/594 [43:13<41:15, 9.07s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 321/594 [43:13<41:15, 9.07s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 321/594 [43:13<41:15, 9.07s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▊ | 321/594 [43:13<41:15, 9.07s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1089, 'learning_rate': 1.914e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1485, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.209, 'learning_rate': 1.9260000000000002e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 322/594 [43:22<40:53, 9.02s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 325/594 [43:49<40:16, 8.98s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 325/594 [43:49<40:16, 8.98s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 325/594 [43:49<40:16, 8.98s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 325/594 [43:49<40:16, 8.98s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 325/594 [43:49<40:16, 8.98s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 326/594 [43:57<39:38, 8.88s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 326/594 [43:57<39:38, 8.88s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 326/594 [43:57<39:38, 8.88s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 326/594 [43:57<39:38, 8.88s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 326/594 [43:57<39:38, 8.88s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 327/594 [44:06<38:56, 8.75s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 327/594 [44:06<38:56, 8.75s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 327/594 [44:06<38:56, 8.75s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 327/594 [44:06<38:56, 8.75s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▌ | 327/594 [44:06<38:56, 8.75s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 328/594 [44:14<38:22, 8.66s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 328/594 [44:14<38:22, 8.66s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 328/594 [44:14<38:22, 8.66s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:19:51,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:19:51,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0316, 'learning_rate': 1.9560000000000002e-05, 'epoch': 0.55} +[WARNING|modeling_utils.py:388] 2022-03-01 11:19:51,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:19:51,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 330/594 [44:31<37:24, 8.50s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 330/594 [44:31<37:24, 8.50s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2485, 'learning_rate': 1.9620000000000002e-05, 'epoch': 0.55} + 56%|█████████████████████████████████████████████ | 330/594 [44:31<37:24, 8.50s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 330/594 [44:31<37:24, 8.50s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 331/594 [44:39<36:48, 8.40s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 331/594 [44:39<36:48, 8.40s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1414, 'learning_rate': 1.968e-05, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▏ | 331/594 [44:39<36:48, 8.40s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 331/594 [44:39<36:48, 8.40s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▏ | 331/594 [44:39<36:48, 8.40s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 332/594 [44:47<36:20, 8.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 332/594 [44:47<36:20, 8.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 332/594 [44:47<36:20, 8.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 332/594 [44:47<36:20, 8.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 332/594 [44:47<36:20, 8.32s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▍ | 333/594 [44:55<35:42, 8.21s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▍ | 333/594 [44:55<35:42, 8.21s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▍ | 333/594 [44:55<35:42, 8.21s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▍ | 333/594 [44:55<35:42, 8.21s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▍ | 333/594 [44:55<35:42, 8.21s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 334/594 [45:03<35:08, 8.11s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 334/594 [45:03<35:08, 8.11s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 334/594 [45:03<35:08, 8.11s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 334/594 [45:03<35:08, 8.11s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 334/594 [45:03<35:08, 8.11s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 335/594 [45:11<34:35, 8.01s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 335/594 [45:11<34:35, 8.01s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 335/594 [45:11<34:35, 8.01s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 335/594 [45:11<34:35, 8.01s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▋ | 335/594 [45:11<34:35, 8.01s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 336/594 [45:18<33:54, 7.89s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 336/594 [45:18<33:54, 7.89s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:20:52,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:20:52,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 337/594 [45:26<33:20, 7.78s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 337/594 [45:26<33:20, 7.78s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 337/594 [45:26<33:20, 7.78s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 337/594 [45:26<33:20, 7.78s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 337/594 [45:26<33:20, 7.78s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 338/594 [45:33<32:41, 7.66s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 338/594 [45:33<32:41, 7.66s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:07,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 339/594 [45:40<31:35, 7.43s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 339/594 [45:40<31:35, 7.43s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1983, 'learning_rate': 2.016e-05, 'epoch': 0.57} + 57%|██████████████████████████████████████████████▏ | 339/594 [45:40<31:35, 7.43s/it]g-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:15,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:15,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2388, 'learning_rate': 2.0220000000000003e-05, 'epoch': 0.57} +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:15,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:21,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:21,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2476, 'learning_rate': 2.0280000000000002e-05, 'epoch': 0.57} +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:21,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:27,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3317, 'learning_rate': 2.0340000000000002e-05, 'epoch': 0.58} +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:31,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:31,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:15:24,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▊ | 343/594 [46:04<25:36, 6.12s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▊ | 343/594 [46:04<25:36, 6.12s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:37,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:37,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:39,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:41,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:41,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:43,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:43,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:45,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:47,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:47,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:48,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:50,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:50,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:53,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:53,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:54,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:57,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:21:57,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6823, 'learning_rate': 2.082e-05, 'epoch': 0.59} +[WARNING|modeling_utils.py:388] 2022-03-01 11:22:02,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:22:02,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:22:07,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:22:07,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1024, 'learning_rate': 2.088e-05, 'epoch': 0.59} +[WARNING|modeling_utils.py:388] 2022-03-01 11:22:07,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:22:07,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 352/594 [46:49<27:49, 6.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 352/594 [46:49<27:49, 6.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1924, 'learning_rate': 2.094e-05, 'epoch': 0.59} + 59%|████████████████████████████████████████████████ | 352/594 [46:49<27:49, 6.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 352/594 [46:49<27:49, 6.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 353/594 [46:59<31:24, 7.82s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 353/594 [46:59<31:24, 7.82s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2308, 'learning_rate': 2.1e-05, 'epoch': 0.59} + 59%|████████████████████████████████████████████████▏ | 353/594 [46:59<31:24, 7.82s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 353/594 [46:59<31:24, 7.82s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0928, 'learning_rate': 2.1059999999999998e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1424, 'learning_rate': 2.1119999999999998e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 354/594 [47:09<33:39, 8.41s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 356/594 [47:29<36:14, 9.14s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 356/594 [47:29<36:14, 9.14s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1515, 'learning_rate': 2.118e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▌ | 356/594 [47:29<36:14, 9.14s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 356/594 [47:29<36:14, 9.14s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 357/594 [47:39<36:56, 9.35s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 357/594 [47:39<36:56, 9.35s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1723, 'learning_rate': 2.124e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▋ | 357/594 [47:39<36:56, 9.35s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 357/594 [47:39<36:56, 9.35s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 357/594 [47:39<36:56, 9.35s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 358/594 [47:48<37:11, 9.46s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 358/594 [47:48<37:11, 9.46s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 358/594 [47:48<37:11, 9.46s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 358/594 [47:48<37:11, 9.46s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▉ | 359/594 [47:58<37:13, 9.51s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|██████████████████████████████████████████���█████▉ | 359/594 [47:58<37:13, 9.51s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1245, 'learning_rate': 2.136e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▉ | 359/594 [47:58<37:13, 9.51s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▉ | 359/594 [47:58<37:13, 9.51s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 360/594 [48:07<37:12, 9.54s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 360/594 [48:07<37:12, 9.54s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0757, 'learning_rate': 2.1419999999999998e-05, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████ | 360/594 [48:07<37:12, 9.54s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 360/594 [48:07<37:12, 9.54s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 360/594 [48:07<37:12, 9.54s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 361/594 [48:17<36:53, 9.50s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 361/594 [48:17<36:53, 9.50s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 361/594 [48:17<36:53, 9.50s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▏ | 361/594 [48:17<36:53, 9.50s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 362/594 [48:26<36:33, 9.45s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 362/594 [48:26<36:33, 9.45s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1548, 'learning_rate': 2.154e-05, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▎ | 362/594 [48:26<36:33, 9.45s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 362/594 [48:26<36:33, 9.45s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▌ | 363/594 [48:36<36:17, 9.43s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▌ | 363/594 [48:36<36:17, 9.43s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.221, 'learning_rate': 2.16e-05, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▌ | 363/594 [48:36<36:17, 9.43s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▌ | 363/594 [48:36<36:17, 9.43s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 364/594 [48:45<35:59, 9.39s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 364/594 [48:45<35:59, 9.39s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0905, 'learning_rate': 2.166e-05, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▋ | 364/594 [48:45<35:59, 9.39s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 364/594 [48:45<35:59, 9.39s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▊ | 365/594 [48:54<35:39, 9.34s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▊ | 365/594 [48:54<35:39, 9.34s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1585, 'learning_rate': 2.172e-05, 'epoch': 0.61} + 61%|█████████████████████████████████��███████████████▊ | 365/594 [48:54<35:39, 9.34s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▊ | 365/594 [48:54<35:39, 9.34s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 366/594 [49:03<35:19, 9.29s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 366/594 [49:03<35:19, 9.29s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1137, 'learning_rate': 2.178e-05, 'epoch': 0.62} + 62%|█████████████████████████████████████████████████▉ | 366/594 [49:03<35:19, 9.29s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 366/594 [49:03<35:19, 9.29s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 367/594 [49:12<34:55, 9.23s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 367/594 [49:12<34:55, 9.23s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1436, 'learning_rate': 2.184e-05, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████ | 367/594 [49:12<34:55, 9.23s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 367/594 [49:12<34:55, 9.23s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0909, 'learning_rate': 2.19e-05, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████��███████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2206, 'learning_rate': 2.196e-05, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 368/594 [49:21<34:32, 9.17s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 370/594 [49:39<33:57, 9.10s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 370/594 [49:39<33:57, 9.10s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1105, 'learning_rate': 2.202e-05, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████▍ | 370/594 [49:39<33:57, 9.10s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 370/594 [49:39<33:57, 9.10s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 370/594 [49:39<33:57, 9.10s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 371/594 [49:48<33:39, 9.06s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1716, 'learning_rate': 2.214e-05, 'epoch': 0.63} +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:25:21,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 373/594 [50:06<33:06, 8.99s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 373/594 [50:06<33:06, 8.99s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 373/594 [50:06<33:06, 8.99s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 373/594 [50:06<33:06, 8.99s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▊ | 373/594 [50:06<33:06, 8.99s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 374/594 [50:15<32:53, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 374/594 [50:15<32:53, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 374/594 [50:15<32:53, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 374/594 [50:15<32:53, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 374/594 [50:15<32:53, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 375/594 [50:25<33:04, 9.06s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 375/594 [50:25<33:04, 9.06s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 375/594 [50:25<33:04, 9.06s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 375/594 [50:25<33:04, 9.06s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▏ | 375/594 [50:25<33:04, 9.06s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 376/594 [50:33<32:36, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 376/594 [50:33<32:36, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 376/594 [50:33<32:36, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 376/594 [50:33<32:36, 8.97s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 377/594 [50:42<32:12, 8.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 377/594 [50:42<32:12, 8.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1806, 'learning_rate': 2.2440000000000002e-05, 'epoch': 0.63} + 63%|███████████████████████████████████████████████████▍ | 377/594 [50:42<32:12, 8.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 377/594 [50:42<32:12, 8.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▍ | 377/594 [50:42<32:12, 8.90s/it]g-point operations will not be computed-01 11:21:33,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 378/594 [50:51<31:49, 8.84s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 378/594 [50:51<31:49, 8.84s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▌ | 378/594 [50:51<31:49, 8.84s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 379/594 [50:59<31:20, 8.75s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 379/594 [50:59<31:20, 8.75s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0377, 'learning_rate': 2.256e-05, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▋ | 379/594 [50:59<31:20, 8.75s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 379/594 [50:59<31:20, 8.75s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 379/594 [50:59<31:20, 8.75s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:21,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 380/594 [51:08<30:54, 8.66s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 380/594 [51:08<30:54, 8.66s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 380/594 [51:08<30:54, 8.66s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 380/594 [51:08<30:54, 8.66s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 381/594 [51:16<30:21, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 381/594 [51:16<30:21, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 381/594 [51:16<30:21, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 381/594 [51:16<30:21, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 381/594 [51:16<30:21, 8.55s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 382/594 [51:24<29:46, 8.43s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 382/594 [51:24<29:46, 8.43s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 382/594 [51:24<29:46, 8.43s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 382/594 [51:24<29:46, 8.43s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 382/594 [51:24<29:46, 8.43s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 383/594 [51:32<29:15, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 383/594 [51:32<29:15, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 383/594 [51:32<29:15, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 383/594 [51:32<29:15, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████▏ | 383/594 [51:32<29:15, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 384/594 [51:40<28:38, 8.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 384/594 [51:40<28:38, 8.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 384/594 [51:40<28:38, 8.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 384/594 [51:40<28:38, 8.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 384/594 [51:40<28:38, 8.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 385/594 [51:48<28:03, 8.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 385/594 [51:48<28:03, 8.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 385/594 [51:48<28:03, 8.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 385/594 [51:48<28:03, 8.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 385/594 [51:48<28:03, 8.05s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|███████████████████████████████████████████████��████▋ | 386/594 [51:55<27:28, 7.93s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 386/594 [51:55<27:28, 7.93s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:27:29,819 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 387/594 [52:03<26:50, 7.78s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 387/594 [52:03<26:50, 7.78s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2782, 'learning_rate': 2.304e-05, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▊ | 387/594 [52:03<26:50, 7.78s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 387/594 [52:03<26:50, 7.78s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 387/594 [52:03<26:50, 7.78s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 388/594 [52:10<26:11, 7.63s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 388/594 [52:10<26:11, 7.63s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 388/594 [52:10<26:11, 7.63s/it]g-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:27:46,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:27:46,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0981, 'learning_rate': 2.3160000000000002e-05, 'epoch': 0.65} +[WARNING|modeling_utils.py:388] 2022-03-01 11:27:46,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:27:46,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:27:46,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:26:38,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 390/594 [52:24<24:48, 7.29s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 390/594 [52:24<24:48, 7.29s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 390/594 [52:24<24:48, 7.29s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 390/594 [52:24<24:48, 7.29s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 391/594 [52:31<23:45, 7.02s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:02,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:02,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:02,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 392/594 [52:37<22:39, 6.73s/it]g-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:08,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:08,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:08,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:27:54,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 393/594 [52:42<21:20, 6.37s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:14,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:14,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 394/594 [52:47<19:56, 5.98s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:18,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:20,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:20,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:22,707 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:24,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:24,656 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:26,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:28,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:28,378 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:30,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:30,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:33,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:34,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:34,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:35,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:35,769 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:37,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:37,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:42,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:42,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:28:42,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▋ | 401/594 [53:19<18:02, 5.61s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▋ | 401/594 [53:19<18:02, 5.61s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▋ | 401/594 [53:19<18:02, 5.61s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▋ | 401/594 [53:19<18:02, 5.61s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▋ | 401/594 [53:19<18:02, 5.61s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 402/594 [53:29<22:17, 6.97s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 402/594 [53:29<22:17, 6.97s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 402/594 [53:29<22:17, 6.97s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 402/594 [53:29<22:17, 6.97s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████��███▉ | 403/594 [53:39<25:02, 7.86s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▉ | 403/594 [53:39<25:02, 7.86s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.164, 'learning_rate': 2.4e-05, 'epoch': 0.68} + 68%|██████████████████████████████████████████████████████▉ | 403/594 [53:39<25:02, 7.86s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▉ | 403/594 [53:39<25:02, 7.86s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 404/594 [53:49<26:46, 8.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 404/594 [53:49<26:46, 8.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1493, 'learning_rate': 2.4060000000000003e-05, 'epoch': 0.68} + 68%|███████████████████████████████████████████████████████ | 404/594 [53:49<26:46, 8.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████ | 404/594 [53:49<26:46, 8.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 405/594 [53:59<27:53, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 405/594 [53:59<27:53, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0781, 'learning_rate': 2.4120000000000003e-05, 'epoch': 0.68} + 68%|███████████████████████████████████████████████████████▏ | 405/594 [53:59<27:53, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 405/594 [53:59<27:53, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 405/594 [53:59<27:53, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 406/594 [54:08<28:36, 9.13s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 406/594 [54:08<28:36, 9.13s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 406/594 [54:08<28:36, 9.13s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 406/594 [54:08<28:36, 9.13s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▌ | 407/594 [54:18<28:59, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▌ | 407/594 [54:18<28:59, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1224, 'learning_rate': 2.4240000000000002e-05, 'epoch': 0.68} + 69%|███████████████████████████████████████████████████████▌ | 407/594 [54:18<28:59, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▌ | 407/594 [54:18<28:59, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▌ | 407/594 [54:18<28:59, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 408/594 [54:28<29:09, 9.41s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 408/594 [54:28<29:09, 9.41s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 408/594 [54:28<29:09, 9.41s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 408/594 [54:28<29:09, 9.41s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 409/594 [54:37<29:08, 9.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 409/594 [54:37<29:08, 9.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1111, 'learning_rate': 2.4360000000000004e-05, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▊ | 409/594 [54:37<29:08, 9.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 409/594 [54:37<29:08, 9.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 410/594 [54:47<29:00, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 410/594 [54:47<29:00, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0723, 'learning_rate': 2.442e-05, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▉ | 410/594 [54:47<29:00, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 410/594 [54:47<29:00, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 410/594 [54:47<29:00, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 411/594 [54:56<28:53, 9.47s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 411/594 [54:56<28:53, 9.47s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 411/594 [54:56<28:53, 9.47s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 411/594 [54:56<28:53, 9.47s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 412/594 [55:06<28:41, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 412/594 [55:06<28:41, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2666, 'learning_rate': 2.454e-05, 'epoch': 0.69} + 69%|████████████████████████████████████████████████████████▏ | 412/594 [55:06<28:41, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 412/594 [55:06<28:41, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 412/594 [55:06<28:41, 9.46s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 413/594 [55:15<28:28, 9.44s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 413/594 [55:15<28:28, 9.44s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 413/594 [55:15<28:28, 9.44s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▎ | 413/594 [55:15<28:28, 9.44s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 414/594 [55:24<28:03, 9.35s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 414/594 [55:24<28:03, 9.35s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1745, 'learning_rate': 2.4659999999999998e-05, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▍ | 414/594 [55:24<28:03, 9.35s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|██████████��█████████████████████████████████████████████▍ | 414/594 [55:24<28:03, 9.35s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 414/594 [55:24<28:03, 9.35s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▌ | 415/594 [55:34<27:46, 9.31s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▌ | 415/594 [55:34<27:46, 9.31s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▌ | 415/594 [55:34<27:46, 9.31s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▌ | 415/594 [55:34<27:46, 9.31s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 416/594 [55:43<27:35, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 416/594 [55:43<27:35, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.08, 'learning_rate': 2.478e-05, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▋ | 416/594 [55:43<27:35, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 416/594 [55:43<27:35, 9.30s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 417/594 [55:52<27:17, 9.25s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 417/594 [55:52<27:17, 9.25s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1442, 'learning_rate': 2.484e-05, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▊ | 417/594 [55:52<27:17, 9.25s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 417/594 [55:52<27:17, 9.25s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 418/594 [56:01<27:02, 9.22s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 418/594 [56:01<27:02, 9.22s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 3.9723, 'learning_rate': 2.49e-05, 'epoch': 0.7} + 70%|█████████████████████████████████████████████████████████ | 418/594 [56:01<27:02, 9.22s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 418/594 [56:01<27:02, 9.22s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 419/594 [56:10<26:45, 9.17s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 419/594 [56:10<26:45, 9.17s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2404, 'learning_rate': 2.4959999999999998e-05, 'epoch': 0.7} + 71%|█████████████████████████████████████████████████████████▏ | 419/594 [56:10<26:45, 9.17s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 419/594 [56:10<26:45, 9.17s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 419/594 [56:10<26:45, 9.17s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 420/594 [56:19<26:30, 9.14s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 420/594 [56:19<26:30, 9.14s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|████████████████████████████████████████████���████████████▎ | 420/594 [56:19<26:30, 9.14s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 420/594 [56:19<26:30, 9.14s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 420/594 [56:19<26:30, 9.14s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 421/594 [56:28<26:06, 9.05s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 421/594 [56:28<26:06, 9.05s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 421/594 [56:28<26:06, 9.05s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 421/594 [56:28<26:06, 9.05s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▍ | 421/594 [56:28<26:06, 9.05s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 422/594 [56:37<25:49, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 422/594 [56:37<25:49, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 422/594 [56:37<25:49, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 422/594 [56:37<25:49, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 422/594 [56:37<25:49, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|██████████████████████████████��██████████████████████████▋ | 423/594 [56:46<25:31, 8.95s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 423/594 [56:46<25:31, 8.95s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 423/594 [56:46<25:31, 8.95s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 423/594 [56:46<25:31, 8.95s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 423/594 [56:46<25:31, 8.95s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 424/594 [56:55<25:14, 8.91s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 424/594 [56:55<25:14, 8.91s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 424/594 [56:55<25:14, 8.91s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▊ | 424/594 [56:55<25:14, 8.91s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 425/594 [57:04<25:22, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|█████████████████████████████████████████████████████████▉ | 425/594 [57:04<25:22, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1346, 'learning_rate': 2.5319999999999998e-05, 'epoch': 0.71} + 72%|█████████████████████████████████████████████████████████▉ | 425/594 [57:04<25:22, 9.01s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:32:41,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:32:41,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1944, 'learning_rate': 2.538e-05, 'epoch': 0.72} +[WARNING|modeling_utils.py:388] 2022-03-01 11:32:41,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:32:41,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▏ | 427/594 [57:21<24:37, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▏ | 427/594 [57:21<24:37, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1165, 'learning_rate': 2.544e-05, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████▏ | 427/594 [57:21<24:37, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▏ | 427/594 [57:21<24:37, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▏ | 427/594 [57:21<24:37, 8.85s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 428/594 [57:30<24:10, 8.74s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 428/594 [57:30<24:10, 8.74s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 428/594 [57:30<24:10, 8.74s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 428/594 [57:30<24:10, 8.74s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 428/594 [57:30<24:10, 8.74s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 429/594 [57:38<23:45, 8.64s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 429/594 [57:38<23:45, 8.64s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 429/594 [57:38<23:45, 8.64s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▌ | 429/594 [57:38<23:45, 8.64s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 430/594 [57:47<23:27, 8.58s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 430/594 [57:47<23:27, 8.58s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.131, 'learning_rate': 2.562e-05, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████▋ | 430/594 [57:47<23:27, 8.58s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 430/594 [57:47<23:27, 8.58s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▊ | 431/594 [57:55<23:04, 8.50s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▊ | 431/594 [57:55<23:04, 8.50s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2406, 'learning_rate': 2.568e-05, 'epoch': 0.72} + 73%|██████████████████████████████████████████████████████████▊ | 431/594 [57:55<23:04, 8.50s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▊ | 431/594 [57:55<23:04, 8.50s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 432/594 [58:03<22:43, 8.42s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████���██████████████████████████████████▉ | 432/594 [58:03<22:43, 8.42s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0029, 'learning_rate': 2.574e-05, 'epoch': 0.73} + 73%|██████████████████████████████████████████████████████████▉ | 432/594 [58:03<22:43, 8.42s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 432/594 [58:03<22:43, 8.42s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 433/594 [58:11<22:21, 8.33s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 433/594 [58:11<22:21, 8.33s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2316, 'learning_rate': 2.58e-05, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████ | 433/594 [58:11<22:21, 8.33s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 433/594 [58:11<22:21, 8.33s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 434/594 [58:19<21:58, 8.24s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 434/594 [58:19<21:58, 8.24s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1736, 'learning_rate': 2.586e-05, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▏ | 434/594 [58:19<21:58, 8.24s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 434/594 [58:19<21:58, 8.24s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 435/594 [58:27<21:31, 8.12s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 435/594 [58:27<21:31, 8.12s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.216, 'learning_rate': 2.592e-05, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▎ | 435/594 [58:27<21:31, 8.12s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 435/594 [58:27<21:31, 8.12s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 436/594 [58:35<21:04, 8.00s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 436/594 [58:35<21:04, 8.00s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1108, 'learning_rate': 2.5980000000000002e-05, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▍ | 436/594 [58:35<21:04, 8.00s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:11,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:11,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0038, 'learning_rate': 2.604e-05, 'epoch': 0.73} +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:14,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:14,925 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 438/594 [58:50<20:04, 7.72s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 438/594 [58:50<20:04, 7.72s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2346, 'learning_rate': 2.61e-05, 'epoch': 0.74} + 74%|███████████████████████████████████████████████████████████▋ | 438/594 [58:50<20:04, 7.72s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 438/594 [58:50<20:04, 7.72s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 438/594 [58:50<20:04, 7.72s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▊ | 439/594 [58:57<19:31, 7.56s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▊ | 439/594 [58:57<19:31, 7.56s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:31,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 440/594 [59:04<18:57, 7.38s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 440/594 [59:04<18:57, 7.38s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3308, 'learning_rate': 2.622e-05, 'epoch': 0.74} + 74%|████████████████████████████████████████████████████████████ | 440/594 [59:04<18:57, 7.38s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:39,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:39,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1971, 'learning_rate': 2.628e-05, 'epoch': 0.74} +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:39,284 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:45,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:45,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3519, 'learning_rate': 2.6340000000000002e-05, 'epoch': 0.74} +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:49,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 443/594 [59:22<16:13, 6.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|█████████████████████████████��██████████████████████████████▍ | 443/594 [59:22<16:13, 6.45s/it]g-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:53,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:53,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:28:12,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 444/594 [59:27<15:08, 6.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 444/594 [59:27<15:08, 6.06s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:34:59,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 445/594 [59:32<14:04, 5.67s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▋ | 445/594 [59:32<14:04, 5.67s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:03,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:05,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:05,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:07,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:08,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:08,967 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:10,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:13,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:13,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:15,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:15,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:18,040 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:18,040 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3646, 'learning_rate': 2.682e-05, 'epoch': 0.76} +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:18,040 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:23,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:35:23,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 451/594 [1:00:00<13:40, 5.74s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 451/594 [1:00:00<13:40, 5.74s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1923, 'learning_rate': 2.688e-05, 'epoch': 0.76} + 76%|███████████████████████████████████████████████████████████▉ | 451/594 [1:00:00<13:40, 5.74s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|███████████████████████████████████████████████████████████▉ | 451/594 [1:00:00<13:40, 5.74s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1942, 'learning_rate': 2.6940000000000003e-05, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1208, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 452/594 [1:00:10<16:40, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 454/594 [1:00:30<19:51, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 454/594 [1:00:30<19:51, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0062, 'learning_rate': 2.7060000000000002e-05, 'epoch': 0.76} +[WARNING|modeling_utils.py:388] 2022-03-01 11:36:06,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:36:06,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 455/594 [1:00:40<20:39, 8.92s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 455/594 [1:00:40<20:39, 8.92s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 455/594 [1:00:40<20:39, 8.92s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 455/594 [1:00:40<20:39, 8.92s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▌ | 455/594 [1:00:40<20:39, 8.92s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 456/594 [1:00:50<21:07, 9.18s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 456/594 [1:00:50<21:07, 9.18s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 456/594 [1:00:50<21:07, 9.18s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 456/594 [1:00:50<21:07, 9.18s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▊ | 457/594 [1:00:59<21:19, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▊ | 457/594 [1:00:59<21:19, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2131, 'learning_rate': 2.724e-05, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▊ | 457/594 [1:00:59<21:19, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▊ | 457/594 [1:00:59<21:19, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 458/594 [1:01:09<21:25, 9.46s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 458/594 [1:01:09<21:25, 9.46s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2255, 'learning_rate': 2.7300000000000003e-05, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▉ | 458/594 [1:01:09<21:25, 9.46s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 458/594 [1:01:09<21:25, 9.46s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 458/594 [1:01:09<21:25, 9.46s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████ | 459/594 [1:01:19<21:24, 9.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████ | 459/594 [1:01:19<21:24, 9.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████ | 459/594 [1:01:19<21:24, 9.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████ | 459/594 [1:01:19<21:24, 9.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 460/594 [1:01:28<21:17, 9.53s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 460/594 [1:01:28<21:17, 9.53s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.16, 'learning_rate': 2.7420000000000002e-05, 'epoch': 0.77} + 77%|█████████████████████████████████████████████████████████████▏ | 460/594 [1:01:28<21:17, 9.53s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 460/594 [1:01:28<21:17, 9.53s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 460/594 [1:01:28<21:17, 9.53s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▎ | 461/594 [1:01:38<21:09, 9.55s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▎ | 461/594 [1:01:38<21:09, 9.55s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▎ | 461/594 [1:01:38<21:09, 9.55s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▎ | 461/594 [1:01:38<21:09, 9.55s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 462/594 [1:01:47<20:56, 9.52s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 462/594 [1:01:47<20:56, 9.52s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.081, 'learning_rate': 2.754e-05, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▍ | 462/594 [1:01:47<20:56, 9.52s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 462/594 [1:01:47<20:56, 9.52s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 462/594 [1:01:47<20:56, 9.52s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 463/594 [1:01:57<20:40, 9.47s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 463/594 [1:01:57<20:40, 9.47s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 463/594 [1:01:57<20:40, 9.47s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▌ | 463/594 [1:01:57<20:40, 9.47s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 464/594 [1:02:06<20:27, 9.44s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|██████████████████████████████████���██████████████████████████▋ | 464/594 [1:02:06<20:27, 9.44s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1737, 'learning_rate': 2.7660000000000003e-05, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▋ | 464/594 [1:02:06<20:27, 9.44s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 464/594 [1:02:06<20:27, 9.44s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 464/594 [1:02:06<20:27, 9.44s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1173, 'learning_rate': 2.778e-05, 'epoch': 0.78} + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▊ | 465/594 [1:02:15<20:12, 9.40s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 467/594 [1:02:34<19:46, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 467/594 [1:02:34<19:46, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 467/594 [1:02:34<19:46, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████ | 467/594 [1:02:34<19:46, 9.34s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 468/594 [1:02:43<19:32, 9.31s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 468/594 [1:02:43<19:32, 9.31s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1334, 'learning_rate': 2.79e-05, 'epoch': 0.79} +[WARNING|modeling_utils.py:388] 2022-03-01 11:38:18,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 469/594 [1:02:52<19:19, 9.27s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 469/594 [1:02:52<19:19, 9.27s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1245, 'learning_rate': 2.7960000000000003e-05, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▍ | 469/594 [1:02:52<19:19, 9.27s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▍ | 469/594 [1:02:52<19:19, 9.27s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 470/594 [1:03:01<18:58, 9.19s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|███████████████████��██████████████████████████████████████████▌ | 470/594 [1:03:01<18:58, 9.19s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1056, 'learning_rate': 2.8020000000000003e-05, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▌ | 470/594 [1:03:01<18:58, 9.19s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 470/594 [1:03:01<18:58, 9.19s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 471/594 [1:03:10<18:42, 9.13s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▋ | 471/594 [1:03:10<18:42, 9.13s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:38:43,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:38:43,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 472/594 [1:03:19<18:32, 9.12s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 472/594 [1:03:19<18:32, 9.12s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1275, 'learning_rate': 2.8139999999999998e-05, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▊ | 472/594 [1:03:19<18:32, 9.12s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 472/594 [1:03:19<18:32, 9.12s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▉ | 473/594 [1:03:28<18:16, 9.06s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▉ | 473/594 [1:03:28<18:16, 9.06s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.126, 'learning_rate': 2.8199999999999998e-05, 'epoch': 0.8} + 80%|██████████████████████████████████████████████████████████████▉ | 473/594 [1:03:28<18:16, 9.06s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▉ | 473/594 [1:03:28<18:16, 9.06s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████████████▉ | 473/594 [1:03:28<18:16, 9.06s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 474/594 [1:03:37<18:00, 9.00s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 474/594 [1:03:37<18:00, 9.00s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 474/594 [1:03:37<18:00, 9.00s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 474/594 [1:03:37<18:00, 9.00s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 474/594 [1:03:37<18:00, 9.00s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 475/594 [1:03:47<18:04, 9.11s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 475/594 [1:03:47<18:04, 9.11s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 475/594 [1:03:47<18:04, 9.11s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▏ | 475/594 [1:03:47<18:04, 9.11s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|█████████████████████████████████████████████████���█████████████▎ | 476/594 [1:03:55<17:45, 9.03s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 476/594 [1:03:55<17:45, 9.03s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0925, 'learning_rate': 2.838e-05, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▎ | 476/594 [1:03:55<17:45, 9.03s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 476/594 [1:03:55<17:45, 9.03s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▍ | 477/594 [1:04:04<17:20, 8.89s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▍ | 477/594 [1:04:04<17:20, 8.89s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1197, 'learning_rate': 2.844e-05, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▍ | 477/594 [1:04:04<17:20, 8.89s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▍ | 477/594 [1:04:04<17:20, 8.89s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 478/594 [1:04:12<16:57, 8.77s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 478/594 [1:04:12<16:57, 8.77s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1422, 'learning_rate': 2.8499999999999998e-05, 'epoch': 0.8} + 80%|███████████████████████████████████████████████████████████████▌ | 478/594 [1:04:12<16:57, 8.77s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 478/594 [1:04:12<16:57, 8.77s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████��███████▋ | 479/594 [1:04:21<16:40, 8.70s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▋ | 479/594 [1:04:21<16:40, 8.70s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0224, 'learning_rate': 2.856e-05, 'epoch': 0.81} + 81%|███████████████████████████████████████████████████████████████▋ | 479/594 [1:04:21<16:40, 8.70s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▋ | 479/594 [1:04:21<16:40, 8.70s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 480/594 [1:04:29<16:22, 8.62s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 480/594 [1:04:29<16:22, 8.62s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0076, 'learning_rate': 2.862e-05, 'epoch': 0.81} + 81%|███████████████████████████████████████████████████████████████▊ | 480/594 [1:04:29<16:22, 8.62s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 480/594 [1:04:29<16:22, 8.62s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 481/594 [1:04:38<16:01, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 481/594 [1:04:38<16:01, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2809, 'learning_rate': 2.868e-05, 'epoch': 0.81} + 81%|███████████████████████████████████████████████████████████████▉ | 481/594 [1:04:38<16:01, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 481/594 [1:04:38<16:01, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▉ | 481/594 [1:04:38<16:01, 8.51s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 482/594 [1:04:46<15:39, 8.39s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 482/594 [1:04:46<15:39, 8.39s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 482/594 [1:04:46<15:39, 8.39s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 482/594 [1:04:46<15:39, 8.39s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 482/594 [1:04:46<15:39, 8.39s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▏ | 483/594 [1:04:54<15:25, 8.33s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▏ | 483/594 [1:04:54<15:25, 8.33s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▏ | 483/594 [1:04:54<15:25, 8.33s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▏ | 483/594 [1:04:54<15:25, 8.33s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▏ | 483/594 [1:04:54<15:25, 8.33s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 484/594 [1:05:02<15:04, 8.22s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 484/594 [1:05:02<15:04, 8.22s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 484/594 [1:05:02<15:04, 8.22s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 484/594 [1:05:02<15:04, 8.22s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 484/594 [1:05:02<15:04, 8.22s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 485/594 [1:05:10<14:41, 8.08s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 485/594 [1:05:10<14:41, 8.08s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 485/594 [1:05:10<14:41, 8.08s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 485/594 [1:05:10<14:41, 8.08s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▌ | 485/594 [1:05:10<14:41, 8.08s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 486/594 [1:05:17<14:18, 7.95s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 486/594 [1:05:17<14:18, 7.95s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 486/594 [1:05:17<14:18, 7.95s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:40:53,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:40:53,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2235, 'learning_rate': 2.904e-05, 'epoch': 0.82} +[WARNING|modeling_utils.py:388] 2022-03-01 11:40:53,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:40:53,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:40:53,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 488/594 [1:05:32<13:32, 7.66s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 488/594 [1:05:32<13:32, 7.66s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 488/594 [1:05:32<13:32, 7.66s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:07,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:07,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1539, 'learning_rate': 2.916e-05, 'epoch': 0.82} +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:07,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:07,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:07,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|█████████████████████████████████████████████████████████████████▏ | 490/594 [1:05:46<12:36, 7.28s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:18,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:18,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:18,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▎ | 491/594 [1:05:52<12:04, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▎ | 491/594 [1:05:52<12:04, 7.04s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:25,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:25,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 492/594 [1:05:59<11:31, 6.78s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 492/594 [1:05:59<11:31, 6.78s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:31,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:31,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▌ | 493/594 [1:06:05<10:59, 6.53s/it]g-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:36,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:36,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:36,207 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:34:57,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 494/594 [1:06:10<10:19, 6.19s/it][WARNING|modeling_utils.py:388] 2022-03-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:42,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:42,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▊ | 495/594 [1:06:15<09:35, 5.81s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:46,045 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:48,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:48,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:50,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:50,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:51,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:53,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:53,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:56,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:58,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:41:58,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:42:00,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 11:42:00,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2369] 2022-03-01 11:42:01,079 >> Batch size = 12luation *****e number of tokens of the input, floating-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 0/221 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 2/221 [00:02<04:30, 1.23s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█▏ | 3/221 [00:05<07:18, 2.01s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 4/221 [00:08<08:48, 2.44s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 5/221 [00:11<09:30, 2.64s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▎ | 6/221 [00:15<10:12, 2.85s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 7/221 [00:18<11:08, 3.12s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 8/221 [00:21<10:49, 3.05s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 9/221 [00:24<10:41, 3.02s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 10/221 [00:28<11:35, 3.30s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 11/221 [00:32<12:33, 3.59s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▍ | 12/221 [00:35<11:46, 3.38s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 13/221 [00:38<11:25, 3.29s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████▏ | 14/221 [00:42<11:30, 3.34s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 15/221 [00:47<13:04, 3.81s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 16/221 [00:51<13:56, 4.08s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 17/221 [00:55<13:14, 3.89s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 18/221 [00:58<12:59, 3.84s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 19/221 [01:02<12:19, 3.66s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▍ | 20/221 [01:05<11:48, 3.52s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 21/221 [01:08<11:04, 3.32s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 22/221 [01:11<10:49, 3.26s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▌ | 23/221 [01:14<10:31, 3.19s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 24/221 [01:18<11:03, 3.37s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▎ | 25/221 [01:22<11:35, 3.55s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 26/221 [01:26<11:51, 3.65s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|██████████ | 27/221 [01:28<10:49, 3.35s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 28/221 [01:32<11:20, 3.52s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 29/221 [01:37<12:06, 3.78s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 30/221 [01:40<11:19, 3.56s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▌ | 31/221 [01:42<10:16, 3.25s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 32/221 [01:45<10:11, 3.23s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 33/221 [01:49<10:38, 3.40s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▌ | 34/221 [01:53<10:41, 3.43s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 35/221 [01:56<10:13, 3.30s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▎ | 36/221 [01:59<10:09, 3.29s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▋ | 37/221 [02:03<10:57, 3.58s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|██████████████ | 38/221 [02:06<10:22, 3.40s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▍ | 39/221 [02:10<10:36, 3.50s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▊ | 40/221 [02:13<10:03, 3.33s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 41/221 [02:16<10:16, 3.43s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▌ | 42/221 [02:21<11:13, 3.76s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▉ | 43/221 [02:24<10:40, 3.60s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 44/221 [02:29<11:43, 3.97s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▋ | 45/221 [02:34<12:13, 4.17s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 46/221 [02:38<12:03, 4.14s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████▍ | 47/221 [02:42<11:48, 4.07s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 48/221 [02:46<11:43, 4.06s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|██████████████████▏ | 49/221 [02:49<11:19, 3.95s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 50/221 [02:53<11:19, 3.97s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▉ | 51/221 [02:57<10:38, 3.76s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 52/221 [03:00<09:55, 3.53s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▋ | 53/221 [03:02<09:20, 3.34s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|████████████████████ | 54/221 [03:06<09:35, 3.44s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 55/221 [03:10<09:40, 3.50s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▊ | 56/221 [03:14<10:26, 3.80s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████▏ | 57/221 [03:18<10:29, 3.84s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████▌ | 58/221 [03:22<10:04, 3.71s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▉ | 59/221 [03:25<09:34, 3.55s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|██████████████████████▎ | 60/221 [03:27<08:44, 3.26s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 61/221 [03:31<08:58, 3.37s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████ | 62/221 [03:34<08:44, 3.30s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 63/221 [03:38<08:50, 3.36s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 64/221 [03:41<08:47, 3.36s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|████████████████████████ | 65/221 [03:44<08:41, 3.34s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▍ | 66/221 [03:48<08:47, 3.40s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 67/221 [03:51<08:13, 3.21s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 68/221 [03:55<08:53, 3.49s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 69/221 [03:58<08:31, 3.37s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 70/221 [04:01<08:25, 3.34s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▎ | 71/221 [04:04<08:13, 3.29s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 72/221 [04:07<07:45, 3.12s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 73/221 [04:11<08:04, 3.28s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 74/221 [04:14<08:00, 3.27s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 75/221 [04:17<07:52, 3.24s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|████████████████████████████▏ | 76/221 [04:20<07:44, 3.21s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▌ | 77/221 [04:23<07:40, 3.20s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▉ | 78/221 [04:27<07:50, 3.29s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▎ | 79/221 [04:30<07:35, 3.21s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 80/221 [04:33<07:35, 3.23s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████ | 81/221 [04:37<07:56, 3.41s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▍ | 82/221 [04:41<08:27, 3.65s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 83/221 [04:46<08:51, 3.85s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████▏ | 84/221 [04:50<08:53, 3.89s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████▌ | 85/221 [04:54<09:13, 4.07s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 86/221 [04:58<08:55, 3.97s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|████████████████████████████████▎ | 87/221 [05:02<09:13, 4.13s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 88/221 [05:06<08:39, 3.90s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|█████████████████████████████████ | 89/221 [05:09<08:08, 3.70s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 90/221 [05:13<08:06, 3.71s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▊ | 91/221 [05:17<08:19, 3.84s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▏ | 92/221 [05:21<08:33, 3.98s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▌ | 93/221 [05:25<08:38, 4.05s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 94/221 [05:29<08:25, 3.98s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 95/221 [05:33<08:22, 3.99s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▌ | 96/221 [05:37<08:11, 3.93s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▉ | 97/221 [05:41<08:22, 4.05s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████▎ | 98/221 [05:45<08:08, 3.97s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 99/221 [05:48<07:24, 3.65s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 100/221 [05:52<07:23, 3.67s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 101/221 [05:55<07:03, 3.53s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▍ | 102/221 [05:58<06:41, 3.37s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▊ | 103/221 [06:02<06:55, 3.52s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████ | 104/221 [06:06<07:04, 3.63s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▍ | 105/221 [06:10<07:25, 3.84s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▊ | 106/221 [06:14<07:25, 3.87s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 107/221 [06:17<06:54, 3.64s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 108/221 [06:21<07:10, 3.81s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▉ | 109/221 [06:25<07:10, 3.84s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▎ | 110/221 [06:28<06:48, 3.68s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▋ | 111/221 [06:32<06:30, 3.55s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████ | 112/221 [06:35<06:34, 3.62s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 113/221 [06:39<06:31, 3.63s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 114/221 [06:42<06:16, 3.52s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████��████████████████████████████▏ | 115/221 [06:46<06:11, 3.51s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▌ | 116/221 [06:49<05:56, 3.40s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▉ | 117/221 [06:52<05:51, 3.38s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████▏ | 118/221 [06:56<05:59, 3.49s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▌ | 119/221 [07:00<06:21, 3.74s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 120/221 [07:05<06:33, 3.90s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 121/221 [07:08<06:21, 3.82s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 122/221 [07:11<05:39, 3.42s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 123/221 [07:13<05:05, 3.12s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▍ | 124/221 [07:16<05:02, 3.12s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 125/221 [07:20<05:20, 3.34s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▏ | 126/221 [07:23<05:02, 3.18s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████▌ | 127/221 [07:26<04:47, 3.05s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 128/221 [07:28<04:28, 2.89s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▎ | 129/221 [07:32<04:46, 3.12s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 130/221 [07:35<04:30, 2.98s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 131/221 [07:38<04:44, 3.17s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 132/221 [07:41<04:26, 3.00s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▋ | 133/221 [07:44<04:27, 3.04s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 134/221 [07:47<04:17, 2.96s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 135/221 [07:50<04:20, 3.03s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▊ | 136/221 [07:54<04:35, 3.24s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 137/221 [07:57<04:38, 3.32s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▌ | 138/221 [08:01<04:48, 3.47s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▉ | 139/221 [08:05<04:48, 3.52s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 140/221 [08:07<04:17, 3.18s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 141/221 [08:10<04:14, 3.18s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|██████████████████████████████████████████████��█████ | 142/221 [08:13<04:07, 3.13s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▍ | 143/221 [08:15<03:46, 2.90s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▊ | 144/221 [08:19<04:05, 3.19s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 145/221 [08:22<03:59, 3.15s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 146/221 [08:26<04:10, 3.34s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 147/221 [08:29<03:54, 3.17s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 148/221 [08:32<03:53, 3.19s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 149/221 [08:36<03:52, 3.23s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▉ | 150/221 [08:39<03:49, 3.23s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 151/221 [08:43<03:57, 3.40s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 152/221 [08:46<03:49, 3.32s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████ | 153/221 [08:49<03:43, 3.29s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 154/221 [08:52<03:44, 3.35s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 155/221 [08:56<03:43, 3.38s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▏ | 156/221 [08:59<03:42, 3.42s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 157/221 [09:02<03:25, 3.21s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▉ | 158/221 [09:07<03:49, 3.64s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 159/221 [09:10<03:47, 3.66s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 160/221 [09:14<03:51, 3.79s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████ | 161/221 [09:19<03:54, 3.90s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 162/221 [09:23<03:50, 3.90s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 163/221 [09:27<03:54, 4.05s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 164/221 [09:32<03:59, 4.20s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▍ | 165/221 [09:35<03:44, 4.00s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▊ | 166/221 [09:38<03:24, 3.71s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▏ | 167/221 [09:42<03:17, 3.65s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████��████████████████████████▌ | 168/221 [09:45<03:04, 3.49s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▉ | 169/221 [09:48<03:05, 3.57s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|██████████████████████████████████████████████████████████████▎ | 170/221 [09:52<03:05, 3.63s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|██████████████████████████████████████████████████████████████▋ | 171/221 [09:56<03:02, 3.64s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|███████████████████████████████████████████████████████████████ | 172/221 [09:59<02:53, 3.55s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|███████████████████████████████████████████████████████████████▍ | 173/221 [10:03<02:48, 3.52s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|███████████████████████████████████████████████████████████████▊ | 174/221 [10:06<02:40, 3.41s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|████████████████████████████████████████████████████████████████▏ | 175/221 [10:09<02:33, 3.33s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|████████████████████████████████████████████████████████████████▌ | 176/221 [10:13<02:36, 3.48s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|████████████████████████████████████████████████████████████████▊ | 177/221 [10:16<02:25, 3.31s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|█████████████████████████████████████████████████████████████████▏ | 178/221 [10:20<02:29, 3.47s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|█████████████████████████████████████████████████████████████████▌ | 179/221 [10:23<02:23, 3.42s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|█████████████████████████████████████████████████████████████████▉ | 180/221 [10:27<02:29, 3.65s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|██████████████████████████████████████████████████████████████████▎ | 181/221 [10:31<02:29, 3.73s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|██████████████████████████████████████████████████████████████████▋ | 182/221 [10:35<02:24, 3.70s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|███████████████████████████████████████████████████████████████████ | 183/221 [10:39<02:27, 3.88s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|███████████████████████████████████████████████████████████████████▍ | 184/221 [10:43<02:21, 3.83s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|███████████████████████████████████████████████████████████████████▊ | 185/221 [10:46<02:11, 3.66s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|████████████████████████████████████████████████████████████████████▏ | 186/221 [10:50<02:15, 3.88s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|████████████████████████████████████████████████████████████████████▌ | 187/221 [10:54<02:07, 3.76s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|████████████████████████████████████████████████████████████████████▉ | 188/221 [10:58<02:07, 3.88s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|█████████████████████████████████████████████████████████████████████▎ | 189/221 [11:02<02:06, 3.97s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|█████████████████████████████████████████████████████████████████████▋ | 190/221 [11:07<02:07, 4.11s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|██████████████████████████████████████████████████████████████████████ | 191/221 [11:11<02:07, 4.25s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|██████████████████████████████████████████████████████████████████████▎ | 192/221 [11:15<02:03, 4.25s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|██���███████████████████████████████████████████████████████████████████▋ | 193/221 [11:19<01:50, 3.94s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|███████████████████████████████████████████████████████████████████████ | 194/221 [11:22<01:39, 3.70s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|███████████████████████████████████████████████████████████████████████▍ | 195/221 [11:25<01:31, 3.50s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|███████████████████████████████████████████████████████████████████████▊ | 196/221 [11:28<01:25, 3.43s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|████████████████████████████████████████████████████████████████████████▏ | 197/221 [11:31<01:18, 3.26s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|████████████████████████████████████████████████████████████████████████▌ | 198/221 [11:35<01:21, 3.54s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|████████████████████████████████████████████████████████████████████████▉ | 199/221 [11:39<01:23, 3.80s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|█████████████████████████████████████████████████████████████████████████▎ | 200/221 [11:43<01:17, 3.69s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|█████████████████████████████████████████████████████████████████████████▋ | 201/221 [11:46<01:11, 3.59s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|██████████████████████████████████████████████████████████████████████████ | 202/221 [11:49<01:03, 3.36s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████████████████▍ | 203/221 [11:53<01:01, 3.43s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████████████████▊ | 204/221 [11:57<01:03, 3.73s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|���██████████████████████████████████████████████████████████████████████████▏ | 205/221 [12:02<01:04, 4.06s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|███████████████████████████████████████████████████████████████████████████▌ | 206/221 [12:07<01:03, 4.24s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|███████████████████████████████████████████████████████████████████████████▊ | 207/221 [12:10<00:55, 3.98s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|████████████████████████████████████████████████████████████████████████████▏ | 208/221 [12:14<00:51, 3.94s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|████████████████████████████████████████████████████████████████████████████▌ | 209/221 [12:17<00:44, 3.72s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|████████████████████████████████████████████████████████████████████████████▉ | 210/221 [12:21<00:41, 3.80s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|█████████████████████████████████████████████████████████████████████████████▎ | 211/221 [12:25<00:39, 3.99s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|█████████████████████████████████████████████████████████████████████████████▋ | 212/221 [12:29<00:35, 3.92s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|██████████████████████████████████████████████████████████████████████████████ | 213/221 [12:32<00:29, 3.65s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|██████████████████████████████████████████████████████████████████████████████▍ | 214/221 [12:36<00:25, 3.64s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|██████████████████████████████████████████████████████████████████████████████▊ | 215/221 [12:40<00:22, 3.82s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|███████████████████████████████████████████████████████████████████████████████▏ | 216/221 [12:44<00:19, 3.93s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|███████████████████████████████████████████████████████████████████████████████▌ | 217/221 [12:48<00:15, 3.93s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|███████████████████████████████████████████████████████████████████████████████▉ | 218/221 [12:52<00:11, 3.91s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 99%|████████████████████████████████████████████████████████████████████████████████▎| 219/221 [12:56<00:07, 3.89s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|████████████████████████████████████████████████████████████████████████████████▋| 220/221 [13:00<00:04, 4.08s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +100%|████████████████████████████████████████████████████████████████████████████████▋| 220/221 [13:00<00:04, 4.08s/it]g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/01/2022 11:55:07 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow +[INFO|configuration_utils.py:438] 2022-03-01 11:55:07,136 >> Configuration saved in ./checkpoint-500/config.json g-point operations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-01 11:55:12,541 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonerations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-01 11:55:12,541 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonerations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-01 11:55:12,541 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonerations will not be computed-01 11:41:40,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/01/2022 11:56:42 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20220301_103527-1wkgn37c/run-1wkgn37c.wandb']. This may take a bit of time if the files are large.