diff --git "a/wandb/run-20220302_041332-j5suzd56/files/output.log" "b/wandb/run-20220302_041332-j5suzd56/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220302_041332-j5suzd56/files/output.log" @@ -0,0 +1,2446 @@ + + + 0%| | 0/297 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:39,818 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:42,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:45,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:47,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:50,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:53,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:13:55,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 1/297 [00:22<1:48:55, 22.08s/it] + + 0%|▎ | 1/297 [00:22<1:48:55, 22.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:13:58,649 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:01,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:03,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:06,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:08,917 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:11,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:14,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8456, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:16,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 2/297 [00:42<1:44:28, 21.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:14:19,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:21,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:24,283 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:26,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:29,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:31,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:34,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:37,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 3/297 [01:03<1:42:30, 20.92s/it] + + 1%|▊ | 3/297 [01:03<1:42:30, 20.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:14:39,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:42,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:44,738 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:47,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:49,748 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:52,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:54,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:14:57,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 4/297 [01:23<1:40:50, 20.65s/it] + + 1%|█ | 4/297 [01:23<1:40:50, 20.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:14:59,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:02,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:04,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:07,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:09,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:12,335 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:14,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:17,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 5/297 [01:43<1:39:15, 20.40s/it] + + 2%|█▎ | 5/297 [01:43<1:39:15, 20.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:15:19,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:22,321 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:24,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:27,230 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:29,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:32,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:34,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8624, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:37,081 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▋ | 6/297 [02:03<1:38:00, 20.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:15:39,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:42,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:44,562 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:46,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:49,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:51,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:54,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8356, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 04:15:56,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▉ | 7/297 [02:23<1:36:52, 20.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:15:59,357 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:01,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:04,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:06,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:09,210 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:11,721 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:14,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:16,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9066, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.03} + + 3%|██▏ | 8/297 [02:42<1:36:04, 19.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:16:19,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:21,540 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:23,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:26,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:28,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:31,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:33,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:36,002 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 9/297 [03:02<1:35:02, 19.80s/it] + + 3%|██▍ | 9/297 [03:02<1:35:02, 19.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:16:38,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:41,133 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:43,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:45,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:48,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:50,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:53,115 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:16:55,514 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6498, 'learning_rate': 1.8e-06, 'epoch': 0.03} + + 3%|██▋ | 10/297 [03:21<1:34:16, 19.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:16:58,033 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:00,358 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:02,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:05,109 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:07,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:09,850 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:12,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.778, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:14,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 4%|██▉ | 11/297 [03:40<1:33:01, 19.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:17:17,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:19,443 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:21,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:24,180 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:26,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:28,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:31,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:33,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 12/297 [03:59<1:32:04, 19.38s/it] + + 4%|███▏ | 12/297 [03:59<1:32:04, 19.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:17:36,114 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:38,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:40,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:43,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:45,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:48,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7069, 'learning_rate': 2.4000000000000003e-06, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 04:17:53,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 13/297 [04:19<1:31:43, 19.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 13/297 [04:19<1:31:43, 19.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:00,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:00,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:04,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:04,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:09,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:09,592 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:45, 19.24s/it]g-point operations will not be computed-02 04:17:55,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:45, 19.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 14/297 [04:38<1:30:45, 19.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:18,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:18,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:23,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:23,559 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:28,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:28,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [04:56<1:29:27, 19.03s/it]g-point operations will not be computed-02 04:18:14,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [04:56<1:29:27, 19.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 15/297 [04:56<1:29:27, 19.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:37,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:37,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:42,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:42,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:46,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:46,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:15<1:28:16, 18.85s/it]g-point operations will not be computed-02 04:18:32,948 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:15<1:28:16, 18.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 16/297 [05:15<1:28:16, 18.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:55,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:18:55,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:00,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:00,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:05,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:33<1:27:26, 18.74s/it]g-point operations will not be computed-02 04:18:51,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:33<1:27:26, 18.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 17/297 [05:33<1:27:26, 18.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:14,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:14,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:18,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:18,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:23,273 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:19, 18.57s/it]g-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:19, 18.57s/it]g-point operations will not be computed-02 04:19:09,784 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:19, 18.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 18/297 [05:51<1:26:19, 18.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:32,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:32,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:36,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:36,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:41,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:31, 18.46s/it]g-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:31, 18.46s/it]g-point operations will not be computed-02 04:19:27,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:31, 18.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 19/297 [06:10<1:25:31, 18.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:50,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:50,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:55,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:55,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:59,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:19:59,590 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:40, 18.34s/it]g-point operations will not be computed-02 04:19:46,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:40, 18.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 20/297 [06:28<1:24:40, 18.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:08,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:08,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:13,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:17,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:17,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:17,595 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:46<1:23:54, 18.24s/it]g-point operations will not be computed-02 04:20:04,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:46<1:23:54, 18.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 21/297 [06:46<1:23:54, 18.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:26,635 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:26,635 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:30,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:30,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:35,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:35,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:03<1:22:56, 18.09s/it]g-point operations will not be computed-02 04:20:22,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:03<1:22:56, 18.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 22/297 [07:03<1:22:56, 18.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:44,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:44,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:48,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:48,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:53,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:20:53,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:21<1:22:10, 18.00s/it]g-point operations will not be computed-02 04:20:39,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:21<1:22:10, 18.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▏ | 23/297 [07:21<1:22:10, 18.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:02,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:02,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:06,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:06,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:10,745 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:16, 17.86s/it]g-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:16, 17.86s/it]g-point operations will not be computed-02 04:20:57,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:16, 17.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 24/297 [07:39<1:21:16, 17.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:19,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:19,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:23,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:23,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:27,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:27,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:27,997 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [07:56<1:20:50, 17.83s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 25/297 [07:56<1:20:50, 17.83s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:37,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:37,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:37,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:37,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:21:37,311 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.716, 'learning_rate': 5e-06, 'epoch': 0.09} + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 26/297 [08:14<1:20:11, 17.75s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4488, 'learning_rate': 5.2e-06, 'epoch': 0.09} + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 27/297 [08:31<1:19:05, 17.57s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:18:08, 17.43s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:18:08, 17.43s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 28/297 [08:48<1:18:08, 17.43s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:31,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:31,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:31,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:31,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 29/297 [09:05<1:17:24, 17.33s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 29/297 [09:05<1:17:24, 17.33s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3365, 'learning_rate': 5.600000000000001e-06, 'epoch': 0.1} + 10%|███████▊ | 29/297 [09:05<1:17:24, 17.33s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:48,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:48,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:22:54,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4924, 'learning_rate': 5.8e-06, 'epoch': 0.1} + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 30/297 [09:22<1:16:37, 17.22s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▎ | 31/297 [09:39<1:15:37, 17.06s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 32/297 [09:55<1:14:34, 16.88s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 33/297 [10:12<1:13:20, 16.67s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 34/297 [10:27<1:11:49, 16.38s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:34, 16.16s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:34, 16.16s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:34, 16.16s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:34, 16.16s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▍ | 35/297 [10:43<1:10:34, 16.16s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:28,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:28,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3629, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.12} + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 36/297 [10:58<1:09:16, 15.92s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 37/297 [11:13<1:07:45, 15.64s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:24:51,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:28<1:06:48, 15.48s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:28<1:06:48, 15.48s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:28<1:06:48, 15.48s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 38/297 [11:28<1:06:48, 15.48s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:11,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:11,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:11,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:43<1:04:57, 15.11s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▌ | 39/297 [11:43<1:04:57, 15.11s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4827, 'learning_rate': 7.6e-06, 'epoch': 0.13} + 13%|██████████▌ | 39/297 [11:43<1:04:57, 15.11s/it]g-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:23,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:23,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:23,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:23,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:23,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:21:15,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [11:56<1:02:38, 14.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [11:56<1:02:38, 14.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▊ | 40/297 [11:56<1:02:38, 14.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:38,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:38,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:38,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:38,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:32,200 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 41/297 [12:09<59:53, 14.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 41/297 [12:09<59:53, 14.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 41/297 [12:09<59:53, 14.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:50,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:50,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:54,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:54,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4631, 'learning_rate': 8.200000000000001e-06, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 04:25:54,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:00,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:03,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:03,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:25:44,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 43/297 [12:32<53:25, 12.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:26:07,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▊ | 43/297 [12:32<53:25, 12.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:26:07,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2478, 'learning_rate': 8.400000000000001e-06, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:10,990 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:07,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:13,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:07,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:13,392 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:07,191 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 44/297 [12:42<49:53, 11.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▏ | 44/297 [12:42<49:53, 11.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:19,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:21,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:21,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:24,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:24,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:27,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:29,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:31,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:33,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:33,088 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:35,013 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:36,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:38,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:40,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:40,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:42,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:45,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:46,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:46,778 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:48,322 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:51,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:53,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:53,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:54,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:57,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:26:57,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4195, 'learning_rate': 9.800000000000001e-06, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:03,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:03,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:08,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:08,276 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:13,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:13,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:13,448 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:27:18,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 52/297 [14:05<56:46, 13.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2035, 'learning_rate': 1.04e-05, 'epoch': 0.18} + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 53/297 [14:25<1:04:20, 15.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2004, 'learning_rate': 1.06e-05, 'epoch': 0.18} + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▌ | 54/297 [14:45<1:09:04, 17.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1688, 'learning_rate': 1.08e-05, 'epoch': 0.18} + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 55/297 [15:05<1:12:12, 17.90s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 56/297 [15:25<1:14:06, 18.45s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3153, 'learning_rate': 1.1200000000000001e-05, 'epoch': 0.19} + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▎ | 57/297 [15:44<1:15:15, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 58/297 [16:04<1:15:52, 19.05s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2806, 'learning_rate': 1.16e-05, 'epoch': 0.2} + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▉ | 59/297 [16:23<1:15:55, 19.14s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2199, 'learning_rate': 1.18e-05, 'epoch': 0.2} + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 60/297 [16:42<1:15:48, 19.19s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2737, 'learning_rate': 1.2e-05, 'epoch': 0.21} + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 61/297 [17:01<1:15:15, 19.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 62/297 [17:20<1:14:45, 19.09s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2967, 'learning_rate': 1.24e-05, 'epoch': 0.21} + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2293, 'learning_rate': 1.2600000000000001e-05, 'epoch': 0.22} + 22%|████████████████���▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▏ | 64/297 [17:59<1:14:12, 19.11s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 65/297 [18:18<1:13:36, 19.04s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3119, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.22} + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▊ | 66/297 [18:36<1:12:55, 18.94s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 67/297 [18:55<1:12:05, 18.81s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 68/297 [19:13<1:11:22, 18.70s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2353, 'learning_rate': 1.3600000000000002e-05, 'epoch': 0.23} + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 69/297 [19:32<1:10:45, 18.62s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1731, 'learning_rate': 1.3800000000000002e-05, 'epoch': 0.24} + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▊ | 70/297 [19:50<1:10:11, 18.55s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1953, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.24} + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████ | 71/297 [20:08<1:09:20, 18.41s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3072, 'learning_rate': 1.42e-05, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:34:00,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2183, 'learning_rate': 1.44e-05, 'epoch': 0.25} + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 73/297 [20:44<1:07:46, 18.15s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3211, 'learning_rate': 1.48e-05, 'epoch': 0.25} + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 74/297 [21:02<1:07:00, 18.03s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1977, 'learning_rate': 1.5e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1627, 'learning_rate': 1.52e-05, 'epoch': 0.26} + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 76/297 [21:37<1:05:40, 17.83s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2265, 'learning_rate': 1.54e-05, 'epoch': 0.26} + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 78/297 [22:11<1:03:46, 17.47s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1036, 'learning_rate': 1.56e-05, 'epoch': 0.27} + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 79/297 [22:28<1:02:42, 17.26s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.293, 'learning_rate': 1.58e-05, 'epoch': 0.27} + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▌ | 80/297 [22:45<1:01:56, 17.13s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▊ | 81/297 [23:02<1:01:07, 16.98s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 82/297 [23:18<1:00:16, 16.82s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▉ | 83/297 [23:34<59:07, 16.58s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|███████████████████████▏ | 84/297 [23:50<57:54, 16.31s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:05<56:45, 16.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 85/297 [24:05<56:45, 16.06s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:37:45,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:37:45,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:37:45,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:37:45,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:37:45,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:37:45,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▋ | 86/297 [24:21<55:41, 15.84s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2583, 'learning_rate': 1.7199999999999998e-05, 'epoch': 0.29} +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:09,711 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [24:51<53:34, 15.38s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▎ | 88/297 [24:51<53:34, 15.38s/it]g-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:30,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:30,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:30,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:30,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:30,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:30,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:26:17,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:04<51:52, 14.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:04<51:52, 14.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:04<51:52, 14.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:04<51:52, 14.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▌ | 89/297 [25:04<51:52, 14.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:50,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 90/297 [25:18<49:59, 14.49s/it]g-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 90/297 [25:18<49:59, 14.49s/it]g-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3256, 'learning_rate': 1.78e-05, 'epoch': 0.3} +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:57,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:57,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:57,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:38:57,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:04,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:04,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3338, 'learning_rate': 1.8e-05, 'epoch': 0.31} +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:04,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:10,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:15,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▍ | 92/297 [25:42<45:18, 13.26s/it]g-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▍ | 92/297 [25:42<45:18, 13.26s/it]g-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4136, 'learning_rate': 1.8200000000000002e-05, 'epoch': 0.31} +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:20,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:20,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:24,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:27,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:27,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3547, 'learning_rate': 1.84e-05, 'epoch': 0.31} +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:31,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:31,213 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:34,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:37,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:37,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2947, 'learning_rate': 1.86e-05, 'epoch': 0.32} +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:40,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:43,068 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:45,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:45,296 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:38:40,519 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 95/297 [26:12<36:57, 10.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:39:47,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:49,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:39:47,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:51,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:39:47,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:53,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:39:47,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:53,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:39:47,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▌ | 96/297 [26:20<33:59, 10.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:39:55,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:57,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:39:55,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:39:59,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:39:55,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 97/297 [26:28<30:52, 9.26s/it]g-point operations will not be computed-02 04:39:55,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 97/297 [26:28<30:52, 9.26s/it]g-point operations will not be computed-02 04:39:55,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 97/297 [26:28<30:52, 9.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:40:02,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:05,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:02,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 98/297 [26:34<27:36, 8.32s/it]g-point operations will not be computed-02 04:40:02,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 98/297 [26:34<27:36, 8.32s/it]g-point operations will not be computed-02 04:40:02,705 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 98/297 [26:34<27:36, 8.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:40:08,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:11,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:08,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▎ | 99/297 [26:39<24:32, 7.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:40:13,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|████████████���██████████████▎ | 99/297 [26:39<24:32, 7.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:40:13,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:15,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:13,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:17,256 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:13,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:17,256 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:13,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 100/297 [26:44<22:01, 6.71s/it]g-point operations will not be computed-02 04:40:13,983 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 100/297 [26:44<22:01, 6.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▎ | 100/297 [26:44<22:01, 6.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:26,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:26,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:31,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:31,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1591, 'learning_rate': 2e-05, 'epoch': 0.34} +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:40:36,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.217, 'learning_rate': 2.0200000000000003e-05, 'epoch': 0.34} + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 102/297 [27:25<44:47, 13.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.223, 'learning_rate': 2.04e-05, 'epoch': 0.35} + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████ | 103/297 [27:45<50:33, 15.64s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 104/297 [28:05<54:22, 16.90s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2281, 'learning_rate': 2.08e-05, 'epoch': 0.35} + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 105/297 [28:25<56:48, 17.75s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [28:45<58:22, 18.34s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [28:45<58:22, 18.34s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1396, 'learning_rate': 2.1e-05, 'epoch': 0.36} + 36%|████████████████████████████▉ | 106/297 [28:45<58:22, 18.34s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 106/297 [28:45<58:22, 18.34s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:42:30,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:42:30,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:42:30,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1033, 'learning_rate': 2.12e-05, 'epoch': 0.36} + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 107/297 [29:04<59:03, 18.65s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 108/297 [29:23<59:35, 18.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.224, 'learning_rate': 2.16e-05, 'epoch': 0.37} + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2011, 'learning_rate': 2.18e-05, 'epoch': 0.37} + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 109/297 [29:43<59:39, 19.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:21<58:59, 19.03s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:21<58:59, 19.03s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:21<58:59, 19.03s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:21<58:59, 19.03s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:21<58:59, 19.03s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 111/297 [30:21<58:59, 19.03s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:44:11,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1944, 'learning_rate': 2.22e-05, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 112/297 [30:40<58:25, 18.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▊ | 113/297 [30:59<58:30, 19.08s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.198, 'learning_rate': 2.26e-05, 'epoch': 0.38} + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|███████████████████████████████ | 114/297 [31:18<57:58, 19.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1551, 'learning_rate': 2.2800000000000002e-05, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:10,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [31:55<56:38, 18.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▋ | 116/297 [31:55<56:38, 18.78s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1715, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:36,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:36,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:36,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:36,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:45:36,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1146, 'learning_rate': 2.32e-05, 'epoch': 0.39} + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▉ | 117/297 [32:13<55:40, 18.56s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1539, 'learning_rate': 2.3400000000000003e-05, 'epoch': 0.4} + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 118/297 [32:31<55:00, 18.44s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▍ | 119/297 [32:49<54:23, 18.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2062, 'learning_rate': 2.38e-05, 'epoch': 0.4} + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▋ | 120/297 [33:07<53:55, 18.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 121/297 [33:25<53:16, 18.16s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▎ | 122/297 [33:43<52:37, 18.04s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:01<51:58, 17.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▌ | 123/297 [34:01<51:58, 17.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1026, 'learning_rate': 2.44e-05, 'epoch': 0.41} + 41%|█████████████████████████████████▌ | 123/297 [34:01<51:58, 17.92s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:47:43,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:47:43,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:47:43,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:47:43,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:47:43,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 124/297 [34:18<51:28, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:36<51:10, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:36<51:10, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2935, 'learning_rate': 2.48e-05, 'epoch': 0.42} + 42%|██████████████████████████████████ | 125/297 [34:36<51:10, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 125/297 [34:36<51:10, 17.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:48:19,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:48:19,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:48:19,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:48:19,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:48:19,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████▎ | 126/297 [34:54<50:22, 17.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.241, 'learning_rate': 2.5200000000000003e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▋ | 127/297 [35:11<49:33, 17.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1107, 'learning_rate': 2.54e-05, 'epoch': 0.43} + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▉ | 128/297 [35:28<48:46, 17.32s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2664, 'learning_rate': 2.5600000000000002e-05, 'epoch': 0.43} + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████▏ | 129/297 [35:44<48:08, 17.19s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|█████��█████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 130/297 [36:01<47:20, 17.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 132/297 [36:34<45:52, 16.68s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 133/297 [36:50<45:03, 16.49s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▌ | 134/297 [37:05<44:01, 16.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3296, 'learning_rate': 2.6800000000000004e-05, 'epoch': 0.45} +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:50:55,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [37:36<42:01, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [37:36<42:01, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [37:36<42:01, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████ | 136/297 [37:36<42:01, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:19,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:19,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:19,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:19,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 137/297 [37:50<40:56, 15.35s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.102, 'learning_rate': 2.7400000000000002e-05, 'epoch': 0.46} +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:39,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:53,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:53,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1484, 'learning_rate': 2.7600000000000003e-05, 'epoch': 0.47} +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:53,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:51:53,536 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:02,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:02,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:02,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:02,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▏ | 140/297 [38:33<37:40, 14.40s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:10,192 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:10,192 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:10,192 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:16,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:16,467 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 141/297 [38:45<36:05, 13.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▍ | 141/297 [38:45<36:05, 13.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:22,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:22,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:22,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:22,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:30,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:30,152 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▋ | 142/297 [38:57<34:25, 13.33s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:34,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:34,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:38,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:38,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:42,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:42,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.245, 'learning_rate': 2.84e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:46,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:46,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:50,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:53,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:53,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2863, 'learning_rate': 2.86e-05, 'epoch': 0.48} +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:56,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:52:59,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:01,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 145/297 [39:28<28:25, 11.22s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 145/297 [39:28<28:25, 11.22s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:04,844 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:06,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:09,039 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:11,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:11,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:13,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:15,045 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:16,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:18,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:18,570 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:20,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:23,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:25,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:25,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:26,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:29,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:30,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:30,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:33,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:35,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:35,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5734, 'learning_rate': 2.98e-05, 'epoch': 0.5} +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:35,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:41,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:41,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:46,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:46,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:51,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 04:53:51,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2191, 'learning_rate': 3e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 151/297 [40:22<27:06, 11.14s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1928, 'learning_rate': 3.02e-05, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 152/297 [40:43<33:33, 13.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2266, 'learning_rate': 3.04e-05, 'epoch': 0.51} + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▋ | 153/297 [41:03<37:38, 15.69s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1789, 'learning_rate': 3.08e-05, 'epoch': 0.52} + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 154/297 [41:22<40:13, 16.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▌ | 156/297 [42:01<42:47, 18.21s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.117, 'learning_rate': 3.12e-05, 'epoch': 0.53} + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▊ | 157/297 [42:21<43:16, 18.55s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1724, 'learning_rate': 3.1400000000000004e-05, 'epoch': 0.53} + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 158/297 [42:40<43:28, 18.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████���███████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1841, 'learning_rate': 3.16e-05, 'epoch': 0.53} + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 159/297 [42:59<43:23, 18.86s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1549, 'learning_rate': 3.18e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 160/297 [43:18<43:14, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1755, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.54} + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▉ | 161/297 [43:37<42:55, 18.94s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1373, 'learning_rate': 3.2200000000000003e-05, 'epoch': 0.54} + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████���███████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▏ | 162/297 [43:56<42:28, 18.88s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▍ | 163/297 [44:15<42:25, 19.00s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▋ | 164/297 [44:34<41:54, 18.91s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1632, 'learning_rate': 3.3e-05, 'epoch': 0.56} + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████ | 165/297 [44:52<41:20, 18.80s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|██████████████████████████████████████���██████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1625, 'learning_rate': 3.32e-05, 'epoch': 0.56} + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 167/297 [45:29<40:10, 18.54s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▊ | 168/297 [45:47<39:36, 18.42s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0952, 'learning_rate': 3.38e-05, 'epoch': 0.57} + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|██████████████████████████████████████████████ | 169/297 [46:05<39:03, 18.31s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.08, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.58} + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▋ | 171/297 [46:41<38:03, 18.12s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1854, 'learning_rate': 3.4200000000000005e-05, 'epoch': 0.58} + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 172/297 [46:59<37:31, 18.01s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0397, 'learning_rate': 3.4399999999999996e-05, 'epoch': 0.58} + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 173/297 [47:16<36:57, 17.89s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▍ | 174/297 [47:34<36:25, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2331, 'learning_rate': 3.48e-05, 'epoch': 0.59} + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▋ | 175/297 [47:52<36:07, 17.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████���████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████ | 176/297 [48:09<35:33, 17.63s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2239, 'learning_rate': 3.52e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▎ | 177/297 [48:26<34:54, 17.46s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.075, 'learning_rate': 3.54e-05, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▌ | 178/297 [48:43<34:16, 17.28s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 180/297 [49:16<33:02, 16.95s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1456, 'learning_rate': 3.6e-05, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▎ | 181/297 [49:33<32:34, 16.85s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0638, 'learning_rate': 3.62e-05, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 182/297 [49:49<31:55, 16.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████████████████████▉ | 183/297 [50:05<31:12, 16.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:20<30:26, 16.17s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:20<30:26, 16.17s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:20<30:26, 16.17s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:20<30:26, 16.17s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▏ | 184/297 [50:20<30:26, 16.17s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|█████████████████████████████████��████████████████▏ | 184/297 [50:20<30:26, 16.17s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:08,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2108, 'learning_rate': 3.68e-05, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 185/297 [50:36<29:43, 15.93s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [50:51<28:57, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [50:51<28:57, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|██████████████████████████████████████████████████▋ | 186/297 [50:51<28:57, 15.66s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:32,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:32,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:32,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:32,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.18, 'learning_rate': 3.72e-05, 'epoch': 0.63} + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 187/297 [51:06<28:19, 15.45s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 188/297 [51:21<27:52, 15.34s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:58,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:58,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:58,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:58,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:04:58,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:09,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:09,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.124, 'learning_rate': 3.76e-05, 'epoch': 0.64} +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:09,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:09,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:09,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:09,137 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:20,800 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:20,800 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 190/297 [51:48<25:44, 14.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▊ | 190/297 [51:48<25:44, 14.43s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:27,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:27,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:27,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:33,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:33,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 191/297 [52:00<24:20, 13.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|████████████████████████████████████████████████████ | 191/297 [52:00<24:20, 13.77s/it]g-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:39,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:39,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:43,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:43,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:43,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 04:40:21,246 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 192/297 [52:12<22:58, 13.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 192/297 [52:12<22:58, 13.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:51,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:51,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:55,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 193/297 [52:23<21:31, 12.42s/it]g-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▋ | 193/297 [52:23<21:31, 12.42s/it]g-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:05:59,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:01,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:01,927 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:05,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:05,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:05:47,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▉ | 194/297 [52:32<19:51, 11.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:07,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:09,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:07,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:12,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:07,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:14,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:07,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 195/297 [52:41<18:09, 10.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:16,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▏ | 195/297 [52:41<18:09, 10.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:16,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:18,301 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:16,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:20,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:16,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:22,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:16,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:22,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:16,309 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▍ | 196/297 [52:49<16:32, 9.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:24,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:25,807 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:24,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:27,555 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:24,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [52:56<14:59, 9.00s/it]g-point operations will not be computed-02 05:06:24,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▋ | 197/297 [52:56<14:59, 9.00s/it]g-point operations will not be computed-02 05:06:24,026 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:32,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:30,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:34,047 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:30,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:02<13:25, 8.14s/it]g-point operations will not be computed-02 05:06:30,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████ | 198/297 [53:02<13:25, 8.14s/it]g-point operations will not be computed-02 05:06:30,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:38,415 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:37,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:39,763 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:37,036 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 199/297 [53:07<11:58, 7.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:42,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▎ | 199/297 [53:07<11:58, 7.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:42,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:44,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:42,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:12<10:45, 6.65s/it]g-point operations will not be computed-02 05:06:42,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:12<10:45, 6.65s/it]g-point operations will not be computed-02 05:06:42,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:13<10:45, 6.65s/it]g-point operations will not be computed-02 05:06:42,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:13<10:45, 6.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 200/297 [53:13<10:45, 6.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:55,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:06:55,004 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:07:00,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:07:00,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:07:05,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:07:05,182 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1679, 'learning_rate': 4.02e-05, 'epoch': 0.68} + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 201/297 [53:33<17:30, 10.94s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1416, 'learning_rate': 4.0400000000000006e-05, 'epoch': 0.68} + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▎ | 203/297 [54:13<24:21, 15.55s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▋ | 204/297 [54:33<26:03, 16.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1222, 'learning_rate': 4.08e-05, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▉ | 205/297 [54:53<27:02, 17.64s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1069, 'learning_rate': 4.1e-05, 'epoch': 0.69} + 69%|██████████████████████████████████████████���█████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 206/297 [55:12<27:36, 18.20s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1686, 'learning_rate': 4.12e-05, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████��██████████████████████████████████████▍ | 207/297 [55:32<27:47, 18.53s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1506, 'learning_rate': 4.14e-05, 'epoch': 0.7} + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▋ | 208/297 [55:51<27:46, 18.72s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 209/297 [56:10<27:34, 18.80s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2229, 'learning_rate': 4.18e-05, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▎ | 210/297 [56:29<27:18, 18.83s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.145, 'learning_rate': 4.22e-05, 'epoch': 0.71} + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▌ | 211/297 [56:47<26:54, 18.77s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1839, 'learning_rate': 4.24e-05, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 213/297 [57:25<26:20, 18.82s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 214/297 [57:43<25:52, 18.71s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1811, 'learning_rate': 4.2800000000000004e-05, 'epoch': 0.72} + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▋ | 215/297 [58:02<25:25, 18.60s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1621, 'learning_rate': 4.3e-05, 'epoch': 0.73} +[WARNING|modeling_utils.py:388] 2022-03-02 05:11:58,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:11:58,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:11:58,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:11:58,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:11:58,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:11:58,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████��████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1023, 'learning_rate': 4.32e-05, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▏ | 217/297 [58:38<24:31, 18.39s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0696, 'learning_rate': 4.3400000000000005e-05, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▍ | 218/297 [58:56<24:03, 18.27s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 219/297 [59:14<23:36, 18.16s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████ | 220/297 [59:32<23:12, 18.09s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 221/297 [59:50<22:48, 18.01s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0443, 'learning_rate': 4.4200000000000004e-05, 'epoch': 0.75} + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████ | 222/297 [1:00:07<22:22, 17.90s/it]g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0864, 'learning_rate': 4.44e-05, 'epoch': 0.75} + g-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:14:05,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:14:05,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:14:05,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:14:05,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:14:05,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:14:05,903 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:06:49,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|███████████████████████████████████████████████████████████▌ | 224/297 [1:00:42<21:33, 17.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1176, 'learning_rate': 4.4800000000000005e-05, 'epoch': 0.76} + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1481, 'learning_rate': 4.5e-05, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████ | 226/297 [1:01:18<20:53, 17.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1439, 'learning_rate': 4.52e-05, 'epoch': 0.76} + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|████████████████████████████████████████████████████████████▍ | 227/297 [1:01:35<20:24, 17.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▋ | 228/297 [1:01:52<19:58, 17.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1046, 'learning_rate': 4.5600000000000004e-05, 'epoch': 0.77} + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████��██████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 229/297 [1:02:09<19:32, 17.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0998, 'learning_rate': 4.58e-05, 'epoch': 0.77} + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|█████████████████████████████████████████████████████████████▏ | 230/297 [1:02:26<19:09, 17.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▍ | 231/297 [1:02:42<18:42, 17.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▋ | 232/297 [1:02:59<18:11, 16.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 78%|█████████████████████████████████████████████████████████████▉ | 233/297 [1:03:15<17:45, 16.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▏ | 234/297 [1:03:31<17:15, 16.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:03:47<16:42, 16.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:03:47<16:42, 16.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:03:47<16:42, 16.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▌ | 235/297 [1:03:47<16:42, 16.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:30,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:30,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:30,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0543, 'learning_rate': 4.7e-05, 'epoch': 0.79} + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 79%|██████████████████████████████████████████████████████████████▊ | 236/297 [1:04:02<16:09, 15.89s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████ | 237/297 [1:04:17<15:38, 15.63s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:17:54,856 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:04:32<15:14, 15.50s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:04:32<15:14, 15.50s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▎ | 238/297 [1:04:32<15:14, 15.50s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:13,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:13,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:13,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:13,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:13,473 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:04:46<14:35, 15.09s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|██████████████████████████████████████████████████████��████████▌ | 239/297 [1:04:46<14:35, 15.09s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 80%|███████████████████████████████████████████████████████████████▌ | 239/297 [1:04:46<14:35, 15.09s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:27,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:27,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:27,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:27,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:27,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|███████████████████████████████████████████████████████████████▊ | 240/297 [1:05:00<13:55, 14.65s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:37,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:37,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:37,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:43,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:43,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 241/297 [1:05:13<13:13, 14.17s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████ | 241/297 [1:05:13<13:13, 14.17s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.267, 'learning_rate': 4.8e-05, 'epoch': 0.81} +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:51,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:51,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:51,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:18:57,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 242/297 [1:05:25<12:27, 13.59s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 81%|████████████████████████████████████████████████████████████████▎ | 242/297 [1:05:25<12:27, 13.59s/it]g-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:02,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:02,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:02,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:08,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:08,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:08,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:14:18,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 243/297 [1:05:36<11:38, 12.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▋ | 243/297 [1:05:36<11:38, 12.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:16,045 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:18,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:18,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:18,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 244/297 [1:05:47<10:43, 12.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 82%|████████████████████████████████████████████████████████████████▉ | 244/297 [1:05:47<10:43, 12.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:25,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:28,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:30,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:30,539 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1646, 'learning_rate': 4.88e-05, 'epoch': 0.82} +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:33,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:35,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 246/297 [1:06:05<08:54, 10.47s/it]g-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 246/297 [1:06:05<08:54, 10.47s/it]g-point operations will not be computed-02 05:19:22,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▍ | 246/297 [1:06:05<08:54, 10.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:40,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:41,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:40,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:43,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:40,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 247/297 [1:06:12<07:59, 9.60s/it]g-point operations will not be computed-02 05:19:40,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 247/297 [1:06:12<07:59, 9.60s/it]g-point operations will not be computed-02 05:19:40,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 83%|█████████████████████████████████████████████████████████████████▋ | 247/297 [1:06:12<07:59, 9.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:47,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:50,833 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:47,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:52,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:47,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:52,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:47,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|█████████████████████████████████████████████████████████████████▉ | 248/297 [1:06:19<07:05, 8.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:19:53,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:19:56,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:53,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▏ | 249/297 [1:06:24<06:11, 7.74s/it]g-point operations will not be computed-02 05:19:53,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▏ | 249/297 [1:06:24<06:11, 7.74s/it]g-point operations will not be computed-02 05:19:53,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:00,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:59,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:02,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:19:59,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:06:30<05:27, 6.96s/it]g-point operations will not be computed-02 05:19:59,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:06:30<05:27, 6.96s/it]g-point operations will not be computed-02 05:19:59,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:06:30<05:27, 6.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 84%|██████████████████████████████████████████████████████████████████▍ | 250/297 [1:06:30<05:27, 6.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:12,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:12,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:17,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:17,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:20:22,248 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.24, 'learning_rate': 5e-05, 'epoch': 0.84} + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|██████████████████████████████████████████████████████████████████▊ | 251/297 [1:06:50<08:33, 11.17s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████ | 252/297 [1:07:10<10:21, 13.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0794, 'learning_rate': 5.0400000000000005e-05, 'epoch': 0.85} + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 85%|███████████████████████████████████████████████████████████████████▎ | 253/297 [1:07:30<11:27, 15.62s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|██████████████████���████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▌ | 254/297 [1:07:50<12:04, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|███████████████████████████████████████████████████████████████████▊ | 255/297 [1:08:10<12:24, 17.73s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0868, 'learning_rate': 5.1000000000000006e-05, 'epoch': 0.86} + 86%|████████████████████████████████���███████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 86%|████████████████████████████████████████████████████████████████████ | 256/297 [1:08:29<12:28, 18.25s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|███████████████████████████████████████████████████████████████████��▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▎ | 257/297 [1:08:49<12:23, 18.58s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1002, 'learning_rate': 5.14e-05, 'epoch': 0.87} + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▋ | 258/297 [1:09:08<12:11, 18.75s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 87%|████████████████████████████████████████████████████████████████████▉ | 259/297 [1:09:27<11:56, 18.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▏ | 260/297 [1:09:45<11:35, 18.79s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1637, 'learning_rate': 5.2000000000000004e-05, 'epoch': 0.88} + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▍ | 261/297 [1:10:04<11:16, 18.78s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1978, 'learning_rate': 5.22e-05, 'epoch': 0.88} + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 88%|█████████████████████████████████████████████████████████████████████▋ | 262/297 [1:10:23<10:55, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|█████████████████████████████████████████████████████████████████████▉ | 263/297 [1:10:42<10:40, 18.84s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:00<10:18, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:00<10:18, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:00<10:18, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▏ | 264/297 [1:11:00<10:18, 18.74s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:24:46,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:24:46,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:24:46,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:24:46,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:19<09:55, 18.60s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:19<09:55, 18.60s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:19<09:55, 18.60s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:19<09:55, 18.60s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 89%|██████████████████████████████████████████████████████████████████████▍ | 265/297 [1:11:19<09:55, 18.60s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:25:06,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:25:06,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:25:06,767 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████���██████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|██████████████████████████████████████████████████████████████████████▊ | 266/297 [1:11:37<09:34, 18.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1128, 'learning_rate': 5.3200000000000006e-05, 'epoch': 0.9} + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 90%|███████████████████████████████████████████████████████████████████████▎ | 268/297 [1:12:13<08:50, 18.31s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▌ | 269/297 [1:12:31<08:29, 18.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:12:49<08:09, 18.12s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:12:49<08:09, 18.12s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1749, 'learning_rate': 5.380000000000001e-05, 'epoch': 0.91} + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:12:49<08:09, 18.12s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|███████████████████████████████████████████████████████████████████████▊ | 270/297 [1:12:49<08:09, 18.12s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:26:34,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:26:34,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:26:34,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0349, 'learning_rate': 5.4000000000000005e-05, 'epoch': 0.91} + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 91%|████████████████████████████████████████████████████████████████████████ | 271/297 [1:13:07<07:48, 18.02s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:13:25<07:28, 17.92s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:13:25<07:28, 17.92s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:13:25<07:28, 17.92s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:13:25<07:28, 17.92s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|██████████████████████████████████████████████████████████████��█████████▎ | 272/297 [1:13:25<07:28, 17.92s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▎ | 272/297 [1:13:25<07:28, 17.92s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0783, 'learning_rate': 5.440000000000001e-05, 'epoch': 0.92} +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:27:14,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0646, 'learning_rate': 5.4600000000000006e-05, 'epoch': 0.92} + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 92%|████████████████████████████████████████████████████████████████████████▉ | 274/297 [1:13:59<06:45, 17.64s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1936, 'learning_rate': 5.4800000000000004e-05, 'epoch': 0.92} + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████���███████████████████████████████▏ | 275/297 [1:14:17<06:28, 17.68s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1413, 'learning_rate': 5.500000000000001e-05, 'epoch': 0.93} + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▍ | 276/297 [1:14:34<06:08, 17.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|███████��█████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 93%|█████████████████████████████████████████████████████████████████████████▋ | 277/297 [1:14:51<05:47, 17.37s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|█████████████████████████████████████████████████████████████████████████▉ | 278/297 [1:15:08<05:26, 17.19s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.079, 'learning_rate': 5.560000000000001e-05, 'epoch': 0.94} + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▏ | 279/297 [1:15:25<05:05, 17.00s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0415, 'learning_rate': 5.580000000000001e-05, 'epoch': 0.94} + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 94%|██████████████████████████████████████████████████████████████████████████▍ | 280/297 [1:15:41<04:46, 16.85s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.125, 'learning_rate': 5.6000000000000006e-05, 'epoch': 0.94} + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|████████████████████████████████████��█████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|██████████████████████████████████████████████████████████████████████████▋ | 281/297 [1:15:58<04:27, 16.70s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████���███████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████ | 282/297 [1:16:13<04:06, 16.45s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:16:29<03:46, 16.15s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:16:29<03:46, 16.15s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:16:29<03:46, 16.15s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 95%|███████████████████████████████████████████████████████████████████████████▎ | 283/297 [1:16:29<03:46, 16.15s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:12,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:12,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:12,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:16:44<03:25, 15.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:16:44<03:25, 15.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.041, 'learning_rate': 5.66e-05, 'epoch': 0.96} + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:16:44<03:25, 15.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:16:44<03:25, 15.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:16:44<03:25, 15.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▌ | 284/297 [1:16:44<03:25, 15.82s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:31,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1417, 'learning_rate': 5.68e-05, 'epoch': 0.96} + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 96%|███████████████████████████████████████████████████████████████████████████▊ | 285/297 [1:16:59<03:06, 15.52s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1461, 'learning_rate': 5.6999999999999996e-05, 'epoch': 0.96} +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:30:47,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:01,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:01,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0791, 'learning_rate': 5.72e-05, 'epoch': 0.97} +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:01,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:01,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:01,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:01,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:13,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▌ | 288/297 [1:17:41<02:10, 14.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▌ | 288/297 [1:17:41<02:10, 14.53s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0781, 'learning_rate': 5.74e-05, 'epoch': 0.97} +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:20,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:20,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:20,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:26,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▊ | 289/297 [1:17:53<01:51, 13.89s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 97%|████████████████████████████████████████████████████████████████████████████▊ | 289/297 [1:17:53<01:51, 13.89s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0942, 'learning_rate': 5.76e-05, 'epoch': 0.97} +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:32,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:32,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:36,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:36,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▏ | 290/297 [1:18:05<01:32, 13.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▏ | 290/297 [1:18:05<01:32, 13.21s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:42,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:42,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:46,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:48,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▍ | 291/297 [1:18:16<01:15, 12.50s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▍ | 291/297 [1:18:16<01:15, 12.50s/it]g-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:52,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:52,772 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:56,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:31:58,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:20:06,871 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▋ | 292/297 [1:18:26<00:58, 11.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 98%|█████████████████████████████████████████████████████████████████████████████▋ | 292/297 [1:18:26<00:58, 11.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2685, 'learning_rate': 5.82e-05, 'epoch': 0.98} +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:04,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:06,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:08,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:08,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:10,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:12,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:14,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:16,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:16,368 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:18,226 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:19,928 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:23,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:23,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:24,798 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:26,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:26,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:29,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:30,538 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:32,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 05:32:32,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5069, 'learning_rate': 5.92e-05, 'epoch': 1.0} +[INFO|configuration_utils.py:438] 2022-03-02 05:32:34,192 >> Configuration saved in ./config.json19:00<00:00, 15.96s/it]g-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|configuration_utils.py:438] 2022-03-02 05:32:50,588 >> Configuration saved in ./config.jsoncessor_config.jsons/it]g-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-02 05:33:07,006 >> Configuration saved in ./preprocessor_config.jsons/it]g-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|feature_extraction_utils.py:324] 2022-03-02 05:33:07,006 >> Configuration saved in ./preprocessor_config.jsons/it]g-point operations will not be computed-02 05:32:01,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/02/2022 05:34:16 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20220302_041332-j5suzd56/run-j5suzd56.wandb']. This may take a bit of time if the files are large.