diff --git "a/wandb/run-20220301_002446-2vmlu6y4/files/output.log" "b/wandb/run-20220301_002446-2vmlu6y4/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220301_002446-2vmlu6y4/files/output.log" @@ -0,0 +1,1693 @@ + + + 0%| | 0/254 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:24:54,658 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:24:57,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:00,809 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:03,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:07,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:10,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7849, 'learning_rate': 2e-08, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:13,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 1/254 [00:25<1:47:24, 25.47s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:25:16,404 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:19,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:22,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:25,277 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:28,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:31,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:34,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7959, 'learning_rate': 4e-08, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:37,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 2/254 [00:49<1:42:49, 24.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:25:40,022 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:42,868 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:45,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:48,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:51,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:54,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:25:57,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9394, 'learning_rate': 6.000000000000001e-08, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:00,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 3/254 [01:12<1:39:54, 23.88s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:26:03,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:05,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:08,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:11,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:14,549 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:17,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:20,338 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:23,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 4/254 [01:35<1:38:00, 23.52s/it] + + 2%|█▎ | 4/254 [01:35<1:38:00, 23.52s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:26:26,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:28,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:31,779 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:34,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:37,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:40,193 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:42,994 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:45,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 5/254 [01:58<1:36:15, 23.20s/it] + + 2%|█▌ | 5/254 [01:58<1:36:15, 23.20s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:26:48,701 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:51,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:54,352 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:57,104 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:26:59,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:02,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:05,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:08,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 6/254 [02:20<1:35:03, 23.00s/it] + + 2%|█▉ | 6/254 [02:20<1:35:03, 23.00s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:27:11,264 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:14,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:16,930 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:19,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:22,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:25,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:28,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.875, 'learning_rate': 1.2000000000000002e-07, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:30,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 7/254 [02:43<1:33:58, 22.83s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:27:33,765 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:36,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:39,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:42,302 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:45,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:47,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:50,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9746, 'learning_rate': 1.4e-07, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:53,456 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 8/254 [03:05<1:33:17, 22.75s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:27:56,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:27:59,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:01,844 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:04,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:07,386 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:10,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:12,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:15,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 9/254 [03:27<1:32:15, 22.59s/it] + + 4%|██▊ | 9/254 [03:27<1:32:15, 22.59s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:28:18,606 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:21,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:24,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:26,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:29,717 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:32,432 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:35,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:37,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 10/254 [03:50<1:31:19, 22.46s/it] + + 4%|███▏ | 10/254 [03:50<1:31:19, 22.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:28:40,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:43,346 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:45,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:48,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:51,400 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:54,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:56,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:28:59,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 11/254 [04:11<1:29:56, 22.21s/it] + + 4%|███▍ | 11/254 [04:11<1:29:56, 22.21s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:29:02,287 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:04,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:07,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:10,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:13,096 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:15,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:18,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9189, 'learning_rate': 2.2e-07, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:21,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 5%|███▊ | 12/254 [04:33<1:28:52, 22.04s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:29:23,906 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:26,674 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:29,420 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:32,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:35,341 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:38,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:40,763 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8467, 'learning_rate': 2.4000000000000003e-07, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:43,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [04:55<1:28:52, 22.13s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 13/254 [04:55<1:28:52, 22.13s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:51,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:51,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:57,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:29:57,066 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:02,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:02,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:17<1:27:55, 21.98s/it]g-point operations will not be computed-01 00:29:46,295 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:17<1:27:55, 21.98s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 14/254 [05:17<1:27:55, 21.98s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:13,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:13,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:18,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:18,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:23,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:38<1:26:34, 21.73s/it]g-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:38<1:26:34, 21.73s/it]g-point operations will not be computed-01 00:30:07,853 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:38<1:26:34, 21.73s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 15/254 [05:38<1:26:34, 21.73s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:34,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:34,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:39,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:39,628 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:44,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:59<1:25:33, 21.57s/it]g-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:59<1:25:33, 21.57s/it]g-point operations will not be computed-01 00:30:29,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:59<1:25:33, 21.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 16/254 [05:59<1:25:33, 21.57s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:55,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:30:55,452 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:00,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:00,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:06,053 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:20<1:24:45, 21.46s/it]g-point operations will not be computed-01 00:30:50,177 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:20<1:24:45, 21.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 17/254 [06:20<1:24:45, 21.46s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:16,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:16,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:21,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:21,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:26,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:26,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:26,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:41<1:23:39, 21.27s/it]g-point operations will not be computed-01 00:31:11,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:41<1:23:39, 21.27s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 18/254 [06:41<1:23:39, 21.27s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:37,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:37,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:42,658 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:42,658 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:47,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:47,852 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:02<1:22:53, 21.16s/it]g-point operations will not be computed-01 00:31:32,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:02<1:22:53, 21.16s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 19/254 [07:02<1:22:53, 21.16s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:58,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:31:58,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:03,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:03,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:08,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:08,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:23<1:21:54, 21.00s/it]g-point operations will not be computed-01 00:31:53,103 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:23<1:21:54, 21.00s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 20/254 [07:23<1:21:54, 21.00s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:18,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:18,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:23,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:23,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:29,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:29,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:43<1:21:00, 20.86s/it]g-point operations will not be computed-01 00:32:13,689 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:43<1:21:00, 20.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 21/254 [07:43<1:21:00, 20.86s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:39,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:39,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:44,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:44,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:49,357 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:49,357 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:04<1:20:04, 20.71s/it]g-point operations will not be computed-01 00:32:34,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:04<1:20:04, 20.71s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 22/254 [08:04<1:20:04, 20.71s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:59,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:32:59,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:04,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:04,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:09,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:24<1:19:17, 20.60s/it]g-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:24<1:19:17, 20.60s/it]g-point operations will not be computed-01 00:32:54,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:24<1:19:17, 20.60s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 23/254 [08:24<1:19:17, 20.60s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:19,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:19,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:24,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:24,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:29,963 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:14,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:44<1:18:31, 20.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▌ | 24/254 [08:44<1:18:31, 20.48s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:39,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:39,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:44,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:44,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:44,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:33:44,920 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 25/254 [09:05<1:18:01, 20.44s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 26/254 [09:24<1:16:47, 20.21s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7962, 'learning_rate': 5.2e-07, 'epoch': 0.11} + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 27/254 [09:44<1:15:45, 20.03s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 28/254 [10:03<1:14:52, 19.88s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7152, 'learning_rate': 5.6e-07, 'epoch': 0.11} + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8741, 'learning_rate': 5.800000000000001e-07, 'epoch': 0.12} + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|█████████▏ | 29/254 [10:23<1:13:58, 19.73s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8711, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.12} + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 31/254 [11:01<1:12:00, 19.38s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8284, 'learning_rate': 6.4e-07, 'epoch': 0.13} + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 32/254 [11:20<1:11:02, 19.20s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7808, 'learning_rate': 6.6e-07, 'epoch': 0.13} + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7802, 'learning_rate': 6.800000000000001e-07, 'epoch': 0.14} + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 35/254 [12:14<1:07:01, 18.36s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8049, 'learning_rate': 7.000000000000001e-07, 'epoch': 0.14} + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8782, 'learning_rate': 7.2e-07, 'epoch': 0.15} + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 37/254 [12:48<1:04:16, 17.77s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:06<1:03:25, 17.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:06<1:03:25, 17.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:06<1:03:25, 17.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 38/254 [13:06<1:03:25, 17.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:03,842 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:03,842 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:03,842 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9045, 'learning_rate': 7.6e-07, 'epoch': 0.15} + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████▎ | 39/254 [13:22<1:01:24, 17.14s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 40/254 [13:37<59:16, 16.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 40/254 [13:37<59:16, 16.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7609, 'learning_rate': 7.8e-07, 'epoch': 0.16} + 16%|████████████▉ | 40/254 [13:37<59:16, 16.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 40/254 [13:37<59:16, 16.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 40/254 [13:37<59:16, 16.62s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:36,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:36,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:51<56:40, 15.96s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 41/254 [13:51<56:40, 15.96s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8423, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.16} + 16%|█████████████▏ | 41/254 [13:51<56:40, 15.96s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:46,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:46,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:46,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:46,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 42/254 [14:05<53:50, 15.24s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 42/254 [14:05<53:50, 15.24s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:56,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:56,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:38:56,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:02,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:02,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:02,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▉ | 43/254 [14:17<50:37, 14.39s/it]g-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:08,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:08,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:12,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:12,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:16,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:16,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.942, 'learning_rate': 8.6e-07, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:20,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:20,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:24,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:24,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:27,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:27,158 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:30,816 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:33,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:35,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:35,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:33:35,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▊ | 46/254 [14:48<39:56, 11.52s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:39,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:39,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:42,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:42,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:44,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:46,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:48,323 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:51,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:51,732 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:53,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:55,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:57,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:57,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:39:59,379 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:01,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:03,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:03,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1212, 'learning_rate': 9.800000000000001e-07, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:09,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:09,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:15,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:15,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:21,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:40:21,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████���███████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6298, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.2} + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▍ | 51/254 [15:40<42:48, 12.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|██████████��█████▊ | 52/254 [16:03<53:37, 15.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4993, 'learning_rate': 1.04e-06, 'epoch': 0.21} + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 53/254 [16:27<1:00:52, 18.17s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5422, 'learning_rate': 1.06e-06, 'epoch': 0.21} + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|█████████████████ | 54/254 [16:50<1:05:23, 19.62s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 55/254 [17:12<1:08:15, 20.58s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|██████████████��██▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▋ | 56/254 [17:35<1:10:00, 21.21s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5453, 'learning_rate': 1.12e-06, 'epoch': 0.22} + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▉ | 57/254 [17:58<1:10:52, 21.59s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.555, 'learning_rate': 1.14e-06, 'epoch': 0.23} + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 58/254 [18:20<1:11:24, 21.86s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5982, 'learning_rate': 1.1600000000000001e-06, 'epoch': 0.23} + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5592, 'learning_rate': 1.1800000000000001e-06, 'epoch': 0.24} + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▌ | 59/254 [18:42<1:11:22, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4864, 'learning_rate': 1.2200000000000002e-06, 'epoch': 0.24} + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 61/254 [19:26<1:10:38, 21.96s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5643, 'learning_rate': 1.2400000000000002e-06, 'epoch': 0.25} + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 63/254 [20:10<1:09:57, 21.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.564, 'learning_rate': 1.26e-06, 'epoch': 0.25} + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 64/254 [20:32<1:09:08, 21.83s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 65/254 [20:53<1:08:22, 21.71s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.565, 'learning_rate': 1.3e-06, 'epoch': 0.26} + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▊ | 66/254 [21:14<1:07:42, 21.61s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|█████████████████████ | 67/254 [21:36<1:07:04, 21.52s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5654, 'learning_rate': 1.34e-06, 'epoch': 0.27} + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 68/254 [21:57<1:06:16, 21.38s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▋ | 69/254 [22:18<1:05:35, 21.27s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████ | 70/254 [22:39<1:04:43, 21.11s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▎ | 71/254 [22:59<1:03:59, 20.98s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 72/254 [23:20<1:03:09, 20.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 73/254 [23:40<1:02:14, 20.64s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 74/254 [24:00<1:01:26, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5188, 'learning_rate': 1.48e-06, 'epoch': 0.29} + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 75/254 [24:21<1:01:06, 20.48s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5556, 'learning_rate': 1.5e-06, 'epoch': 0.3} + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▉ | 76/254 [24:40<1:00:13, 20.30s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4649, 'learning_rate': 1.52e-06, 'epoch': 0.3} + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|████████████████████████▊ | 77/254 [25:00<59:13, 20.08s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4939, 'learning_rate': 1.54e-06, 'epoch': 0.31} + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████���█████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▏ | 78/254 [25:19<58:13, 19.85s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▌ | 79/254 [25:38<57:18, 19.65s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|█████████████████████████▊ | 80/254 [25:57<56:18, 19.41s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▏ | 81/254 [26:16<55:21, 19.20s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4473, 'learning_rate': 1.6200000000000002e-06, 'epoch': 0.32} + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██████████████████████████▍ | 82/254 [26:35<54:29, 19.01s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4477, 'learning_rate': 1.6400000000000002e-06, 'epoch': 0.33} + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▊ | 83/254 [26:53<53:37, 18.82s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████ | 84/254 [27:11<52:26, 18.51s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5565, 'learning_rate': 1.6800000000000002e-06, 'epoch': 0.33} + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|███████████████████████████▍ | 85/254 [27:28<51:25, 18.26s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:46<50:12, 17.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▊ | 86/254 [27:46<50:12, 17.93s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:38,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:38,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:38,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:46,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:46,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:46,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:46,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4815, 'learning_rate': 1.72e-06, 'epoch': 0.34} +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:54,792 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:52:54,792 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:53:01,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:53:01,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:53:01,362 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4657, 'learning_rate': 1.74e-06, 'epoch': 0.35} + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 88/254 [28:19<48:00, 17.35s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5689, 'learning_rate': 1.76e-06, 'epoch': 0.35} + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▋ | 89/254 [28:35<46:27, 16.90s/it]g-point operations will not be computed-01 00:39:37,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [28:50<44:36, 16.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [28:50<44:36, 16.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [28:50<44:36, 16.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [28:50<44:36, 16.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|█████████████████████████████ | 90/254 [28:50<44:36, 16.32s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:53:50,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:53:50,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:04<42:23, 15.61s/it]g-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:04<42:23, 15.61s/it]g-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:04<42:23, 15.61s/it]g-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▍ | 91/254 [29:04<42:23, 15.61s/it]g-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:00,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:00,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:00,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:00,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:53:40,101 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 92/254 [29:17<40:02, 14.83s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 92/254 [29:17<40:02, 14.83s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▋ | 92/254 [29:17<40:02, 14.83s/it][WARNING|modeling_utils.py:388] 2022-03-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:12,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:12,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:17,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:17,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5608, 'learning_rate': 1.8400000000000002e-06, 'epoch': 0.36} +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:17,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:22,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:26,978 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:26,978 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▎ | 94/254 [29:40<34:57, 13.11s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:30,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:33,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:33,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:37,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:37,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|██████████████████████████████▋ | 95/254 [29:50<32:12, 12.15s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:40,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:42,833 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:44,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:47,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:47,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:49,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:51,147 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:53,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:54,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:54,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:56,797 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:54:58,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:00,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:00,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:03,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:04,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:06,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:06,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:09,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:11,555 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:13,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:13,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8874, 'learning_rate': 1.98e-06, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:19,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:19,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:25,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:25,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:31,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:31,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 00:55:31,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 101/254 [30:49<31:42, 12.43s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3656, 'learning_rate': 2.02e-06, 'epoch': 0.4} + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▌ | 102/254 [31:12<39:35, 15.63s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4021, 'learning_rate': 2.04e-06, 'epoch': 0.4} + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 103/254 [31:35<44:43, 17.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3638, 'learning_rate': 2.06e-06, 'epoch': 0.41} + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 104/254 [31:58<48:03, 19.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▍ | 105/254 [32:20<50:13, 20.22s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▊ | 106/254 [32:43<51:27, 20.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2527, 'learning_rate': 2.12e-06, 'epoch': 0.42} + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|██████████████████████████████████ | 107/254 [33:05<52:12, 21.31s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▍ | 108/254 [33:27<52:31, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.361, 'learning_rate': 2.16e-06, 'epoch': 0.43} + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|██████████████████████████████████▊ | 109/254 [33:49<52:35, 21.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3853, 'learning_rate': 2.1800000000000003e-06, 'epoch': 0.43} + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 43%|███████████████████████████████████ | 110/254 [34:11<52:15, 21.77s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3134, 'learning_rate': 2.2e-06, 'epoch': 0.44} + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▍ | 111/254 [34:33<51:44, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3303, 'learning_rate': 2.2200000000000003e-06, 'epoch': 0.44} + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|███████████████████████████████████▋ | 112/254 [34:54<51:22, 21.71s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2543, 'learning_rate': 2.24e-06, 'epoch': 0.44} + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 44%|████████████████████████████████████ | 113/254 [35:16<51:15, 21.81s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▎ | 114/254 [35:38<50:43, 21.74s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|██████��█████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 45%|████████████████████████████████████▋ | 115/254 [35:59<50:01, 21.59s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2974, 'learning_rate': 2.3000000000000004e-06, 'epoch': 0.46} + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|████████████████████████████████████▉ | 116/254 [36:20<49:20, 21.45s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.391, 'learning_rate': 2.3200000000000002e-06, 'epoch': 0.46} + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▎ | 117/254 [36:41<48:42, 21.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 46%|█████████████████████████████████████▋ | 118/254 [37:02<48:01, 21.19s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|█████████████████████████████████████▉ | 119/254 [37:23<47:22, 21.05s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 47%|██████████████████████████████████████▎ | 120/254 [37:44<46:41, 20.91s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|█���████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▌ | 121/254 [38:04<45:59, 20.75s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|██████████████████████████████████████▉ | 122/254 [38:24<45:24, 20.64s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3083, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.48} + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 48%|███████████████████████████████████████▏ | 123/254 [38:44<44:40, 20.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3106, 'learning_rate': 2.46e-06, 'epoch': 0.49} + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▌ | 124/254 [39:04<44:02, 20.33s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4078, 'learning_rate': 2.4800000000000004e-06, 'epoch': 0.49} + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 49%|███████████████████████████████████████▊ | 125/254 [39:25<43:41, 20.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|█���██████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▏ | 126/254 [39:44<42:56, 20.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▌ | 127/254 [40:04<42:09, 19.92s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2952, 'learning_rate': 2.5400000000000002e-06, 'epoch': 0.5} + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 50%|████████████████████████████████████████▊ | 128/254 [40:23<41:29, 19.76s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▏ | 129/254 [40:42<40:43, 19.55s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3936, 'learning_rate': 2.5800000000000003e-06, 'epoch': 0.51} + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 51%|█████████████████████████████████████████▍ | 130/254 [41:01<40:00, 19.36s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3896, 'learning_rate': 2.6e-06, 'epoch': 0.51} + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|█████████████████████████████████████████▊ | 131/254 [41:20<39:12, 19.13s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2743, 'learning_rate': 2.6200000000000003e-06, 'epoch': 0.52} + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████ | 132/254 [41:38<38:37, 18.99s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3252, 'learning_rate': 2.64e-06, 'epoch': 0.52} + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████���███████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 52%|██████████████████████████████████████████▍ | 133/254 [41:57<37:54, 18.80s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|██████████████████████████████████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|████████████████████████████���█████████████▋ | 134/254 [42:14<36:55, 18.46s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 53%|███████████████████████████████████████████ | 135/254 [42:32<36:02, 18.17s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3753, 'learning_rate': 2.7000000000000004e-06, 'epoch': 0.53} + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▎ | 136/254 [42:49<35:06, 17.86s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|███████████████████████████████████████████▋ | 137/254 [43:06<34:10, 17.53s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1952, 'learning_rate': 2.7400000000000004e-06, 'epoch': 0.54} + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 54%|████████████████████████████████████████████ | 138/254 [43:23<33:29, 17.32s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 55%|████████████████████████████████████████████▎ | 139/254 [43:38<32:16, 16.84s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:41,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:41,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4316, 'learning_rate': 2.7800000000000005e-06, 'epoch': 0.55} +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:41,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:41,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:41,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:41,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:08:53,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:07<29:20, 15.58s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|████████████████████████████████████████████▉ | 141/254 [44:07<29:20, 15.58s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4044, 'learning_rate': 2.8000000000000003e-06, 'epoch': 0.55} + 56%|████████████████████████████████████████████▉ | 141/254 [44:07<29:20, 15.58s/it]g-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:02,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:02,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:02,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:02,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:02,351 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 00:54:06,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 142/254 [44:20<27:43, 14.85s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 142/254 [44:20<27:43, 14.85s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▎ | 142/254 [44:20<27:43, 14.85s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:16,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:16,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:16,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:16,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:10,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 143/254 [44:33<26:01, 14.07s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 56%|█████████████████████████████████████████████▌ | 143/254 [44:33<26:01, 14.07s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:26,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:26,754 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:30,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [44:44<24:12, 13.21s/it]g-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 57%|█████████████████████████████████████████████▉ | 144/254 [44:44<24:12, 13.21s/it]g-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:34,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:34,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:38,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:38,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:42,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:42,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:44,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:46,682 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:48,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:50,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:50,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3764, 'learning_rate': 2.9e-06, 'epoch': 0.57} +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:54,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:09:55,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 147/254 [45:10<18:09, 10.18s/it]g-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 147/254 [45:10<18:09, 10.18s/it]g-point operations will not be computed-01 01:09:22,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|██████████████████████████████████████████████▉ | 147/254 [45:10<18:09, 10.18s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:09:59,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:01,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:59,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:04,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:59,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:04,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:09:59,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 58%|███████████████████████████████████████████████▏ | 148/254 [45:17<16:13, 9.19s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:10:06,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:07,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:06,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:10,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:06,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:10,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:06,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▌ | 149/254 [45:23<14:20, 8.20s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:10:12,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:14,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:12,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▊ | 150/254 [45:29<12:47, 7.38s/it]g-point operations will not be computed-01 01:10:12,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▊ | 150/254 [45:29<12:47, 7.38s/it]g-point operations will not be computed-01 01:10:12,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8156, 'learning_rate': 2.9800000000000003e-06, 'epoch': 0.59} + 59%|███████████████████████████████████████████████▊ | 150/254 [45:29<12:47, 7.38s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|███████████████████████████████████████████████▊ | 150/254 [45:29<12:47, 7.38s/it][WARNING|modeling_utils.py:388] 2022-03-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:26,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:26,371 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:32,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:32,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:10:32,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2784, 'learning_rate': 3e-06, 'epoch': 0.59} + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 59%|████████████████████████████████████████████████▏ | 151/254 [45:53<21:22, 12.45s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3132, 'learning_rate': 3.0200000000000003e-06, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▍ | 152/254 [46:16<26:42, 15.71s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3138, 'learning_rate': 3.04e-06, 'epoch': 0.6} + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 60%|████████████████████████████████████████████████▊ | 153/254 [46:39<30:04, 17.86s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████ | 154/254 [47:02<32:10, 19.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▍ | 155/254 [47:24<33:20, 20.21s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2817, 'learning_rate': 3.1000000000000004e-06, 'epoch': 0.61} + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 61%|█████████████████████████████████████████████████▋ | 156/254 [47:46<34:01, 20.83s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2329, 'learning_rate': 3.12e-06, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████ | 157/254 [48:09<34:20, 21.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.258, 'learning_rate': 3.1600000000000002e-06, 'epoch': 0.62} + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 62%|██████████████████████████████████████████████████▍ | 158/254 [48:31<34:22, 21.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████ | 160/254 [49:15<34:00, 21.70s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2045, 'learning_rate': 3.2000000000000003e-06, 'epoch': 0.63} + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 63%|███████████████████████████████████████████████████▎ | 161/254 [49:36<33:31, 21.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2949, 'learning_rate': 3.2200000000000005e-06, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▋ | 162/254 [49:58<33:10, 21.64s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3154, 'learning_rate': 3.2400000000000003e-06, 'epoch': 0.64} + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 64%|███████████████████████████████████████████████████▉ | 163/254 [50:20<32:57, 21.73s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2465, 'learning_rate': 3.2600000000000006e-06, 'epoch': 0.64} + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▎ | 164/254 [50:41<32:20, 21.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2751, 'learning_rate': 3.3000000000000006e-06, 'epoch': 0.65} + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 65%|████████████████████████████████████████████████████▌ | 165/254 [51:02<31:46, 21.42s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▎ | 167/254 [51:44<30:43, 21.19s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|███████████████████████████████████████████████��█████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2938, 'learning_rate': 3.3400000000000006e-06, 'epoch': 0.66} + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 66%|█████████████████████████████████████████████████████▌ | 168/254 [52:05<30:10, 21.05s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2577, 'learning_rate': 3.3600000000000004e-06, 'epoch': 0.66} + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|███████████████████���█████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|█████████████████████████████████████████████████████▉ | 169/254 [52:25<29:40, 20.94s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2641, 'learning_rate': 3.3800000000000007e-06, 'epoch': 0.67} + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▏ | 170/254 [52:46<29:06, 20.79s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|████████���█████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 67%|██████████████████████████████████████████████████████▌ | 171/254 [53:06<28:32, 20.63s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1892, 'learning_rate': 3.4200000000000007e-06, 'epoch': 0.67} + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|██████████████████████████████████████████████████████▊ | 172/254 [53:26<27:59, 20.48s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.248, 'learning_rate': 3.44e-06, 'epoch': 0.68} + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 68%|███████████████████████████████████████████████████████▏ | 173/254 [53:46<27:24, 20.30s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▍ | 174/254 [54:06<26:52, 20.16s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2812, 'learning_rate': 3.48e-06, 'epoch': 0.69} + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|███████████████████████████████████████████████████████▊ | 175/254 [54:26<26:39, 20.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 69%|████████████████████████████████████████████████████████▏ | 176/254 [54:46<26:04, 20.06s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▍ | 177/254 [55:05<25:28, 19.85s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|████████████████████████████████████████████████████████▊ | 178/254 [55:24<24:54, 19.66s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1785, 'learning_rate': 3.5600000000000002e-06, 'epoch': 0.7} + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 70%|█████████████████████████████████████████████████████████ | 179/254 [55:43<24:20, 19.47s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3392, 'learning_rate': 3.58e-06, 'epoch': 0.71} + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 71%|█████████████████████████████████████████████████████████▋ | 181/254 [56:21<23:19, 19.17s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|███████████���██████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1624, 'learning_rate': 3.62e-06, 'epoch': 0.71} + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████ | 182/254 [56:40<22:46, 18.98s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 72%|██████████████████████████████████████████████████████████▎ | 183/254 [56:58<22:13, 18.78s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3062, 'learning_rate': 3.66e-06, 'epoch': 0.72} + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:22:19,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:22:19,942 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|██████████████████████████████████████████████████████████▉ | 185/254 [57:34<21:03, 18.31s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2613, 'learning_rate': 3.7e-06, 'epoch': 0.73} + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 73%|███████████████████████████████████████████████████████████▎ | 186/254 [57:51<20:24, 18.01s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2225, 'learning_rate': 3.7200000000000004e-06, 'epoch': 0.73} + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▋ | 187/254 [58:08<19:48, 17.74s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|███████████████████████████████████████████████████████████▉ | 188/254 [58:25<19:18, 17.56s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:17,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:17,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:17,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:17,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:17,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:17,790 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2256, 'learning_rate': 3.7600000000000004e-06, 'epoch': 0.74} + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 74%|████████████████████████████████████████████████████████████▎ | 189/254 [58:41<18:32, 17.12s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 190/254 [58:57<17:42, 16.60s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 190/254 [58:57<17:42, 16.60s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▌ | 190/254 [58:57<17:42, 16.60s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:52,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:52,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:52,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:52,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:23:52,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 191/254 [59:11<16:47, 15.99s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 75%|████████████████████████████████████████████████████████████▉ | 191/254 [59:11<16:47, 15.99s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:04,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:04,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:04,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:04,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:04,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:04,959 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▏ | 192/254 [59:25<15:45, 15.25s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:16,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:16,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:16,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:22,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:22,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:22,718 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 76%|█████████████████████████████████████████████████████████████▌ | 193/254 [59:37<14:40, 14.43s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:28,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:28,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:33,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:33,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:37,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:37,215 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4079, 'learning_rate': 3.86e-06, 'epoch': 0.76} +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:41,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:41,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:45,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:47,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:47,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3026, 'learning_rate': 3.88e-06, 'epoch': 0.77} +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:51,167 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:53,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:55,693 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:00:09<11:11, 11.57s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 77%|████████████████████████████████████████████████████████████▉ | 196/254 [1:00:09<11:11, 11.57s/it]g-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:24:59,048 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:01,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:03,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:05,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:05,063 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:07,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:08,892 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:10,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:10,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:14,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:15,562 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:17,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:17,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:19,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:21,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:23,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-01 01:25:23,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-01 01:10:20,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF +RuntimeError: CUDA out of memory. Tried to allocate 1.65 GiB (GPU 0; 15.78 GiB total capacity; 11.62 GiB already allocated; 1.65 GiB free; 12.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF \ No newline at end of file