diff --git "a/wandb/run-20220303_074415-2c9ds5of/files/output.log" "b/wandb/run-20220303_074415-2c9ds5of/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220303_074415-2c9ds5of/files/output.log" @@ -0,0 +1,1697 @@ + + + 0%| | 0/1189 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:23,008 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/1189 [00:05<1:55:02, 5.81s/it] + + 0%| | 1/1189 [00:05<1:55:02, 5.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:25,673 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0562, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:28,195 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 2/1189 [00:10<1:47:42, 5.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:30,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:33,357 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 3/1189 [00:16<1:47:09, 5.42s/it] + + 0%|▏ | 3/1189 [00:16<1:47:09, 5.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:36,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8812, 'learning_rate': 4e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:38,652 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 4/1189 [00:21<1:44:41, 5.30s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:41,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:43,710 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5954, 'learning_rate': 6e-06, 'epoch': 0.0} + 0%|▎ | 5/1189 [00:26<1:42:53, 5.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:46,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9116, 'learning_rate': 8e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:48,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 6/1189 [00:31<1:41:51, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:51,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:53,869 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 7/1189 [00:36<1:41:14, 5.14s/it] + + 1%|▍ | 7/1189 [00:36<1:41:14, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:44:56,493 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5554, 'learning_rate': 1.2e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:44:59,018 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 8/1189 [00:41<1:41:13, 5.14s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:01,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:04,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 9/1189 [00:46<1:40:33, 5.11s/it] + + 1%|▌ | 9/1189 [00:46<1:40:33, 5.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:06,641 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.479, 'learning_rate': 1.6e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:09,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 10/1189 [00:51<1:39:42, 5.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:11,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5237, 'learning_rate': 1.8e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:13,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 11/1189 [00:56<1:38:35, 5.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:16,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:18,775 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 12/1189 [01:01<1:37:17, 4.96s/it] + + 1%|▊ | 12/1189 [01:01<1:37:17, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:21,259 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5701, 'learning_rate': 2.2e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:23,677 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 13/1189 [01:06<1:36:51, 4.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:26,129 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:28,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 14/1189 [01:11<1:35:47, 4.89s/it] + + 1%|▉ | 14/1189 [01:11<1:35:47, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:30,924 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2756, 'learning_rate': 2.6e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:33,282 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 15/1189 [01:16<1:35:20, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:35,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4677, 'learning_rate': 2.8e-05, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:37,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█ | 16/1189 [01:20<1:34:04, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:40,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:42,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█▏ | 17/1189 [01:25<1:33:35, 4.79s/it] + + 1%|█▏ | 17/1189 [01:25<1:33:35, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:45,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:47,433 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▏ | 18/1189 [01:30<1:33:12, 4.78s/it] + + 2%|█▏ | 18/1189 [01:30<1:33:12, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:49,839 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:52,095 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2803, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.02} + + 2%|█▎ | 19/1189 [01:34<1:32:27, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:54,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4825, 'learning_rate': 3.6e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 07:45:56,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 20/1189 [01:39<1:32:06, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:45:59,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:01,411 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 21/1189 [01:44<1:31:23, 4.69s/it] + + 2%|█▍ | 21/1189 [01:44<1:31:23, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:03,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:06,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 22/1189 [01:48<1:30:50, 4.67s/it] + + 2%|█▍ | 22/1189 [01:48<1:30:50, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:08,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4523, 'learning_rate': 4.2000000000000004e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:10,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 23/1189 [01:53<1:30:09, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:12,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:15,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 24/1189 [01:58<1:30:01, 4.64s/it] + + 2%|█▌ | 24/1189 [01:58<1:30:01, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:17,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:19,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 25/1189 [02:02<1:29:26, 4.61s/it] + + 2%|█▋ | 25/1189 [02:02<1:29:26, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:22,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:24,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 26/1189 [02:07<1:28:37, 4.57s/it] + + 2%|█▋ | 26/1189 [02:07<1:28:37, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:26,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:28,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 27/1189 [02:11<1:28:06, 4.55s/it] + + 2%|█▊ | 27/1189 [02:11<1:28:06, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:31,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1246, 'learning_rate': 5.2e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:33,262 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▊ | 28/1189 [02:16<1:27:49, 4.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:35,472 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1848, 'learning_rate': 5.4e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:37,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 29/1189 [02:20<1:26:41, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:39,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:41,955 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|█▉ | 30/1189 [02:24<1:25:46, 4.44s/it] + + 3%|█▉ | 30/1189 [02:24<1:25:46, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:44,153 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4211, 'learning_rate': 5.800000000000001e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:46,187 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██ | 31/1189 [02:29<1:24:28, 4.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:48,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2275, 'learning_rate': 6e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:50,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 32/1189 [02:33<1:23:29, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:52,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3839, 'learning_rate': 6.2e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:54,672 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 33/1189 [02:37<1:23:04, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:46:56,809 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0573, 'learning_rate': 6.4e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:46:58,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▎ | 34/1189 [02:41<1:21:41, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:00,826 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2317, 'learning_rate': 6.6e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:02,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▎ | 35/1189 [02:45<1:20:12, 4.17s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:04,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.585, 'learning_rate': 6.800000000000001e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:06,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▍ | 36/1189 [02:49<1:18:58, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:08,722 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3714, 'learning_rate': 7.000000000000001e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:10,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 37/1189 [02:53<1:17:19, 4.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:12,491 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:14,356 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2535, 'learning_rate': 7.2e-05, 'epoch': 0.03} + 3%|██▌ | 38/1189 [02:57<1:15:53, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:16,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:17,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 39/1189 [03:00<1:13:51, 3.85s/it] + + 3%|██▌ | 39/1189 [03:00<1:13:51, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:19,780 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4277, 'learning_rate': 7.6e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:21,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▋ | 40/1189 [03:04<1:11:51, 3.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:23,264 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1859, 'learning_rate': 7.8e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:24,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 41/1189 [03:07<1:09:51, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:26,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:28,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 42/1189 [03:10<1:06:53, 3.50s/it] + + 4%|██▊ | 42/1189 [03:10<1:06:53, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:29,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2361, 'learning_rate': 8.2e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:31,011 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 43/1189 [03:13<1:03:46, 3.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:32,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2978, 'learning_rate': 8.400000000000001e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:33,723 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 44/1189 [03:16<1:00:08, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:35,040 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:36,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 45/1189 [03:19<56:13, 2.95s/it] + 4%|███ | 45/1189 [03:19<56:13, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:37,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:38,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 46/1189 [03:21<51:49, 2.72s/it] + 4%|███▏ | 46/1189 [03:21<51:49, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:39,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:40,409 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 47/1189 [03:23<47:47, 2.51s/it] + 4%|███▏ | 47/1189 [03:23<47:47, 2.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:41,367 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:42,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 48/1189 [03:24<43:17, 2.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:42,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7486, 'learning_rate': 9.2e-05, 'epoch': 0.04} +{'loss': 4.9082, 'learning_rate': 9.400000000000001e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:43,617 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 49/1189 [03:26<38:42, 2.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:44,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7752, 'learning_rate': 9.400000000000001e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-03 07:47:45,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 50/1189 [03:28<37:03, 1.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:48,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 51/1189 [03:33<57:40, 3.04s/it]g-point operations will not be computed-03 07:47:48,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 51/1189 [03:33<57:40, 3.04s/it]g-point operations will not be computed-03 07:47:48,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 51/1189 [03:33<57:40, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:53,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 51/1189 [03:33<57:40, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:53,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 52/1189 [03:39<1:10:26, 3.72s/it]g-point operations will not be computed-03 07:47:53,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 52/1189 [03:39<1:10:26, 3.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:58,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 52/1189 [03:39<1:10:26, 3.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:47:58,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 53/1189 [03:44<1:18:11, 4.13s/it]g-point operations will not be computed-03 07:47:58,890 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 53/1189 [03:44<1:18:11, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:03,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:49<1:22:55, 4.38s/it]g-point operations will not be computed-03 07:48:03,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:49<1:22:55, 4.38s/it]g-point operations will not be computed-03 07:48:03,891 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:49<1:22:55, 4.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:08,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 55/1189 [03:54<1:26:49, 4.59s/it]g-point operations will not be computed-03 07:48:08,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 55/1189 [03:54<1:26:49, 4.59s/it]g-point operations will not be computed-03 07:48:08,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 55/1189 [03:54<1:26:49, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:13,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 55/1189 [03:54<1:26:49, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:13,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 56/1189 [03:59<1:29:07, 4.72s/it]g-point operations will not be computed-03 07:48:13,996 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 56/1189 [03:59<1:29:07, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:19,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 57/1189 [04:04<1:30:37, 4.80s/it]g-point operations will not be computed-03 07:48:19,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 57/1189 [04:04<1:30:37, 4.80s/it]g-point operations will not be computed-03 07:48:19,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 57/1189 [04:04<1:30:37, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:24,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 57/1189 [04:04<1:30:37, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:24,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 58/1189 [04:09<1:31:30, 4.85s/it]g-point operations will not be computed-03 07:48:24,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 58/1189 [04:09<1:31:30, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:28,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 59/1189 [04:14<1:31:39, 4.87s/it]g-point operations will not be computed-03 07:48:28,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 59/1189 [04:14<1:31:39, 4.87s/it]g-point operations will not be computed-03 07:48:28,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 59/1189 [04:14<1:31:39, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:33,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:19<1:31:35, 4.87s/it]g-point operations will not be computed-03 07:48:33,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:19<1:31:35, 4.87s/it]g-point operations will not be computed-03 07:48:33,768 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:19<1:31:35, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:38,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:19<1:31:35, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:38,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 61/1189 [04:23<1:31:08, 4.85s/it]g-point operations will not be computed-03 07:48:38,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 61/1189 [04:23<1:31:08, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:43,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 62/1189 [04:28<1:30:54, 4.84s/it]g-point operations will not be computed-03 07:48:43,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 62/1189 [04:28<1:30:54, 4.84s/it]g-point operations will not be computed-03 07:48:43,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 62/1189 [04:28<1:30:54, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:48,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:33<1:30:26, 4.82s/it]g-point operations will not be computed-03 07:48:48,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:33<1:30:26, 4.82s/it]g-point operations will not be computed-03 07:48:48,165 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:33<1:30:26, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:53,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:33<1:30:26, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:53,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 64/1189 [04:38<1:30:16, 4.81s/it]g-point operations will not be computed-03 07:48:53,001 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 64/1189 [04:38<1:30:16, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:48:57,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 65/1189 [04:42<1:29:20, 4.77s/it]g-point operations will not be computed-03 07:48:57,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 65/1189 [04:42<1:29:20, 4.77s/it]g-point operations will not be computed-03 07:48:57,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 65/1189 [04:42<1:29:20, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:47<1:28:20, 4.72s/it]g-point operations will not be computed-03 07:49:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:47<1:28:20, 4.72s/it]g-point operations will not be computed-03 07:49:02,370 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:47<1:28:20, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:06,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:47<1:28:20, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:06,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 67/1189 [04:52<1:27:46, 4.69s/it]g-point operations will not be computed-03 07:49:06,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 67/1189 [04:52<1:27:46, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:11,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 68/1189 [04:56<1:27:07, 4.66s/it]g-point operations will not be computed-03 07:49:11,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 68/1189 [04:56<1:27:07, 4.66s/it]g-point operations will not be computed-03 07:49:11,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 68/1189 [04:56<1:27:07, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:16,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:01<1:26:50, 4.65s/it]g-point operations will not be computed-03 07:49:16,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:01<1:26:50, 4.65s/it]g-point operations will not be computed-03 07:49:16,222 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:01<1:26:50, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:20,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:05<1:26:03, 4.61s/it]g-point operations will not be computed-03 07:49:20,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:05<1:26:03, 4.61s/it]g-point operations will not be computed-03 07:49:20,771 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:05<1:26:03, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:25,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:05<1:26:03, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:25,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 71/1189 [05:10<1:25:33, 4.59s/it]g-point operations will not be computed-03 07:49:25,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 71/1189 [05:10<1:25:33, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:29,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 71/1189 [05:10<1:25:33, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:29,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 72/1189 [05:15<1:25:37, 4.60s/it]g-point operations will not be computed-03 07:49:29,912 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 72/1189 [05:15<1:25:37, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:34,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 73/1189 [05:19<1:24:43, 4.56s/it]g-point operations will not be computed-03 07:49:34,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 73/1189 [05:19<1:24:43, 4.56s/it]g-point operations will not be computed-03 07:49:34,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 73/1189 [05:19<1:24:43, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:38,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 74/1189 [05:23<1:24:18, 4.54s/it]g-point operations will not be computed-03 07:49:38,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 74/1189 [05:23<1:24:18, 4.54s/it]g-point operations will not be computed-03 07:49:38,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 74/1189 [05:23<1:24:18, 4.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:43,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 75/1189 [05:28<1:23:54, 4.52s/it]g-point operations will not be computed-03 07:49:43,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 75/1189 [05:28<1:23:54, 4.52s/it]g-point operations will not be computed-03 07:49:43,401 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 75/1189 [05:28<1:23:54, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:47,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 76/1189 [05:32<1:22:56, 4.47s/it]g-point operations will not be computed-03 07:49:47,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 76/1189 [05:32<1:22:56, 4.47s/it]g-point operations will not be computed-03 07:49:47,811 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 76/1189 [05:32<1:22:56, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:52,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:49:54,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:49:52,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:49:54,304 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:49:52,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 77/1189 [05:37<1:22:18, 4.44s/it]g-point operations will not be computed-03 07:49:52,178 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 77/1189 [05:37<1:22:18, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:49:56,616 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 78/1189 [05:41<1:22:11, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:00,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 78/1189 [05:41<1:22:11, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:00,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 07:50:00,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 07:50:00,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 79/1189 [05:45<1:21:31, 4.41s/it]g-point operations will not be computed-03 07:50:00,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 79/1189 [05:45<1:21:31, 4.41s/it]g-point operations will not be computed-03 07:50:00,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 07:50:05,258 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 80/1189 [05:50<1:20:44, 4.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:09,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 81/1189 [05:54<1:19:52, 4.33s/it]g-point operations will not be computed-03 07:50:09,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 81/1189 [05:54<1:19:52, 4.33s/it]g-point operations will not be computed-03 07:50:09,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 81/1189 [05:54<1:19:52, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:13,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 82/1189 [05:58<1:19:48, 4.33s/it]g-point operations will not be computed-03 07:50:13,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 82/1189 [05:58<1:19:48, 4.33s/it]g-point operations will not be computed-03 07:50:13,755 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 82/1189 [05:58<1:19:48, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:18,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 83/1189 [06:02<1:18:50, 4.28s/it]g-point operations will not be computed-03 07:50:18,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 83/1189 [06:02<1:18:50, 4.28s/it]g-point operations will not be computed-03 07:50:18,042 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 83/1189 [06:02<1:18:50, 4.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:22,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 84/1189 [06:06<1:17:22, 4.20s/it]g-point operations will not be computed-03 07:50:22,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 84/1189 [06:06<1:17:22, 4.20s/it]g-point operations will not be computed-03 07:50:22,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 84/1189 [06:06<1:17:22, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:26,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 85/1189 [06:10<1:15:59, 4.13s/it]g-point operations will not be computed-03 07:50:26,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 85/1189 [06:10<1:15:59, 4.13s/it]g-point operations will not be computed-03 07:50:26,159 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 85/1189 [06:10<1:15:59, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:30,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 86/1189 [06:14<1:14:36, 4.06s/it]g-point operations will not be computed-03 07:50:30,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 86/1189 [06:14<1:14:36, 4.06s/it]g-point operations will not be computed-03 07:50:30,067 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 86/1189 [06:14<1:14:36, 4.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:33,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:18<1:12:49, 3.97s/it]g-point operations will not be computed-03 07:50:33,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:18<1:12:49, 3.97s/it]g-point operations will not be computed-03 07:50:33,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:18<1:12:49, 3.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:37,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:18<1:12:49, 3.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:37,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:22<1:10:58, 3.87s/it]g-point operations will not be computed-03 07:50:37,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:22<1:10:58, 3.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:41,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:22<1:10:58, 3.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:41,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 89/1189 [06:25<1:08:51, 3.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:44,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 89/1189 [06:25<1:08:51, 3.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:44,601 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:50:46,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 90/1189 [06:29<1:06:42, 3.64s/it] + 8%|█████▉ | 90/1189 [06:29<1:06:42, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:47,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 90/1189 [06:29<1:06:42, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:47,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 91/1189 [06:32<1:04:06, 3.50s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:47,863 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:35<1:01:37, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:50,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:35<1:01:37, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:50,973 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:35<1:01:37, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:53,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:35<1:01:37, 3.37s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:53,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 93/1189 [06:38<58:34, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:56,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 93/1189 [06:38<58:34, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:56,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 94/1189 [06:40<54:52, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:59,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 95/1189 [06:43<51:34, 2.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:59,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 95/1189 [06:43<51:34, 2.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:50:59,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 96/1189 [06:45<47:28, 2.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:01,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 96/1189 [06:45<47:28, 2.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:01,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 97/1189 [06:47<43:26, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:05,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 97/1189 [06:47<43:26, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:05,102 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6501, 'learning_rate': 0.00018800000000000002, 'epoch': 0.08} + 8%|██████▋ | 98/1189 [06:48<39:35, 2.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:06,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 98/1189 [06:48<39:35, 2.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:06,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 99/1189 [06:50<35:46, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:08,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 99/1189 [06:50<35:46, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:08,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 100/1189 [06:52<34:53, 1.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:08,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 100/1189 [06:52<34:53, 1.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:12,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 101/1189 [06:57<53:42, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:12,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 101/1189 [06:57<53:42, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:12,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 101/1189 [06:57<53:42, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:17,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 101/1189 [06:57<53:42, 2.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:17,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 102/1189 [07:02<1:05:36, 3.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:17,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▋ | 102/1189 [07:02<1:05:36, 3.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:22,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 103/1189 [07:07<1:13:35, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:22,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 103/1189 [07:07<1:13:35, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:22,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 103/1189 [07:07<1:13:35, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:27,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 103/1189 [07:07<1:13:35, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:27,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 103/1189 [07:07<1:13:35, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:27,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 104/1189 [07:12<1:18:42, 4.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:27,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 104/1189 [07:12<1:18:42, 4.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:32,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 105/1189 [07:17<1:22:30, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:32,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 105/1189 [07:17<1:22:30, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:32,407 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 105/1189 [07:17<1:22:30, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:37,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 106/1189 [07:22<1:24:47, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:37,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 106/1189 [07:22<1:24:47, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:37,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 106/1189 [07:22<1:24:47, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:42,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 106/1189 [07:22<1:24:47, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:42,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 107/1189 [07:27<1:26:05, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:42,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 107/1189 [07:27<1:26:05, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:47,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 108/1189 [07:32<1:26:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:47,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 108/1189 [07:32<1:26:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:47,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 108/1189 [07:32<1:26:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:52,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 108/1189 [07:32<1:26:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:52,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 109/1189 [07:37<1:26:58, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:52,237 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 109/1189 [07:37<1:26:58, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:57,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 110/1189 [07:42<1:27:00, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:57,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 110/1189 [07:42<1:27:00, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:51:57,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 110/1189 [07:42<1:27:00, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:01,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 111/1189 [07:47<1:26:25, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:01,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 111/1189 [07:47<1:26:25, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:01,907 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 111/1189 [07:47<1:26:25, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:06,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 112/1189 [07:51<1:26:02, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:06,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 112/1189 [07:51<1:26:02, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:06,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 112/1189 [07:51<1:26:02, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:11,361 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 113/1189 [07:56<1:25:23, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:11,361 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 113/1189 [07:56<1:25:23, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:11,361 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 113/1189 [07:56<1:25:23, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:16,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 114/1189 [08:01<1:25:21, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:16,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 114/1189 [08:01<1:25:21, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:16,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▍ | 114/1189 [08:01<1:25:21, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:20,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 115/1189 [08:06<1:25:10, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:20,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 115/1189 [08:06<1:25:10, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:20,888 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 115/1189 [08:06<1:25:10, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:25,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 116/1189 [08:10<1:24:43, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:25,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 116/1189 [08:10<1:24:43, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:25,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 116/1189 [08:10<1:24:43, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:30,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 116/1189 [08:10<1:24:43, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:30,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 117/1189 [08:15<1:24:04, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:30,227 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 117/1189 [08:15<1:24:04, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:34,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 117/1189 [08:15<1:24:04, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:34,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 118/1189 [08:19<1:23:19, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:34,862 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▋ | 118/1189 [08:19<1:23:19, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:39,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 119/1189 [08:24<1:23:00, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:39,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 119/1189 [08:24<1:23:00, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:39,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 119/1189 [08:24<1:23:00, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:44,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 119/1189 [08:24<1:23:00, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:44,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 120/1189 [08:29<1:22:30, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:44,050 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 120/1189 [08:29<1:22:30, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:48,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 120/1189 [08:29<1:22:30, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:48,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 121/1189 [08:33<1:21:59, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:48,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 121/1189 [08:33<1:21:59, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:53,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 121/1189 [08:33<1:21:59, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:53,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 122/1189 [08:38<1:21:40, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:53,166 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 122/1189 [08:38<1:21:40, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:57,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 122/1189 [08:38<1:21:40, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:57,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 123/1189 [08:42<1:20:55, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:52:57,660 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 123/1189 [08:42<1:20:55, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:02,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 124/1189 [08:47<1:20:19, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:02,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 124/1189 [08:47<1:20:19, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:02,163 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 124/1189 [08:47<1:20:19, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:06,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 125/1189 [08:51<1:19:39, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:06,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 125/1189 [08:51<1:19:39, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:06,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 125/1189 [08:51<1:19:39, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:11,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 126/1189 [08:56<1:19:22, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:11,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 126/1189 [08:56<1:19:22, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:11,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 126/1189 [08:56<1:19:22, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:15,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 127/1189 [09:00<1:18:34, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:15,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 127/1189 [09:00<1:18:34, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:15,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 127/1189 [09:00<1:18:34, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:19,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 128/1189 [09:04<1:18:00, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:19,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 128/1189 [09:04<1:18:00, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:19,741 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 128/1189 [09:04<1:18:00, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:24,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 128/1189 [09:04<1:18:00, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:24,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 128/1189 [09:04<1:18:00, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:24,092 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 129/1189 [09:09<1:17:25, 4.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:28,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 130/1189 [09:13<1:16:25, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:28,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 130/1189 [09:13<1:16:25, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:28,363 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 130/1189 [09:13<1:16:25, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:32,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 131/1189 [09:17<1:15:33, 4.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:32,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 131/1189 [09:17<1:15:33, 4.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:32,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 131/1189 [09:17<1:15:33, 4.29s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:36,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 132/1189 [09:21<1:14:36, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:36,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 132/1189 [09:21<1:14:36, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:36,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 132/1189 [09:21<1:14:36, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:40,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 132/1189 [09:21<1:14:36, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:40,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 133/1189 [09:25<1:14:06, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:40,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 133/1189 [09:25<1:14:06, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:44,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 133/1189 [09:25<1:14:06, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:44,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 134/1189 [09:29<1:12:55, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:44,899 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 134/1189 [09:29<1:12:55, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:48,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 134/1189 [09:29<1:12:55, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:48,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 135/1189 [09:33<1:11:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:48,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 135/1189 [09:33<1:11:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:52,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 135/1189 [09:33<1:11:37, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:52,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 136/1189 [09:37<1:10:29, 4.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:52,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 136/1189 [09:37<1:10:29, 4.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:56,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 136/1189 [09:37<1:10:29, 4.02s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:56,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|████████▉ | 137/1189 [09:41<1:09:44, 3.98s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:53:56,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|████████▉ | 137/1189 [09:41<1:09:44, 3.98s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:00,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|████████▉ | 137/1189 [09:41<1:09:44, 3.98s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:00,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 138/1189 [09:45<1:08:39, 3.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:00,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 138/1189 [09:45<1:08:39, 3.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:04,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 138/1189 [09:45<1:08:39, 3.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:04,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 139/1189 [09:48<1:07:06, 3.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:04,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 139/1189 [09:48<1:07:06, 3.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:04,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 140/1189 [09:52<1:05:13, 3.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:07,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 140/1189 [09:52<1:05:13, 3.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:11,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 140/1189 [09:52<1:05:13, 3.73s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:11,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 141/1189 [09:55<1:02:37, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:11,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 141/1189 [09:55<1:02:37, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:11,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 141/1189 [09:55<1:02:37, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:14,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 142/1189 [09:58<59:41, 3.42s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 143/1189 [10:01<56:17, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 143/1189 [10:01<56:17, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:17,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 144/1189 [10:04<53:08, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:19,898 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 144/1189 [10:04<53:08, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:19,898 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 145/1189 [10:06<49:46, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:22,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 145/1189 [10:06<49:46, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:22,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 146/1189 [10:08<46:10, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:24,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 146/1189 [10:08<46:10, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:24,726 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 147/1189 [10:10<42:15, 2.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:26,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 147/1189 [10:10<42:15, 2.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:26,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 148/1189 [10:12<38:39, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:28,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 148/1189 [10:12<38:39, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:28,622 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4023, 'learning_rate': 0.00029, 'epoch': 0.12} + 13%|██████████ | 149/1189 [10:13<34:43, 2.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:31,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 149/1189 [10:13<34:43, 2.00s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:31,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 150/1189 [10:15<33:40, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:31,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 150/1189 [10:15<33:40, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:35,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 151/1189 [10:21<52:06, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:35,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 151/1189 [10:21<52:06, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:35,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 151/1189 [10:21<52:06, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:40,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 151/1189 [10:21<52:06, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:40,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 152/1189 [10:26<1:03:36, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:40,861 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 152/1189 [10:26<1:03:36, 3.68s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:46,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 153/1189 [10:31<1:11:37, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:46,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 153/1189 [10:31<1:11:37, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:46,150 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 153/1189 [10:31<1:11:37, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:51,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 154/1189 [10:36<1:17:02, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:51,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 154/1189 [10:36<1:17:02, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:51,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 154/1189 [10:36<1:17:02, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 154/1189 [10:36<1:17:02, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 154/1189 [10:36<1:17:02, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 155/1189 [10:41<1:21:02, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 155/1189 [10:41<1:21:02, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 156/1189 [10:47<1:22:54, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 156/1189 [10:47<1:22:54, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5068, 'learning_rate': 0.000306, 'epoch': 0.13} + 13%|██████████▏ | 156/1189 [10:47<1:22:54, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 157/1189 [10:52<1:23:42, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 157/1189 [10:52<1:23:42, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 158/1189 [10:57<1:24:43, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 158/1189 [10:57<1:24:43, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.077, 'learning_rate': 0.00031, 'epoch': 0.13} + 13%|██████████▍ | 159/1189 [11:02<1:25:09, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 159/1189 [11:02<1:25:09, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8097, 'learning_rate': 0.000312, 'epoch': 0.13} + 13%|██████████▍ | 160/1189 [11:07<1:24:48, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 160/1189 [11:07<1:24:48, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3888, 'learning_rate': 0.000314, 'epoch': 0.13} + 14%|██████████▌ | 161/1189 [11:11<1:24:20, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 161/1189 [11:11<1:24:20, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9162, 'learning_rate': 0.000316, 'epoch': 0.14} + 14%|██████████▌ | 161/1189 [11:11<1:24:20, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 162/1189 [11:16<1:24:00, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 162/1189 [11:16<1:24:00, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 162/1189 [11:16<1:24:00, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 163/1189 [11:21<1:23:32, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 163/1189 [11:21<1:23:32, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 163/1189 [11:21<1:23:32, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 164/1189 [11:26<1:22:47, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 164/1189 [11:26<1:22:47, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 165/1189 [11:31<1:22:23, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▊ | 165/1189 [11:31<1:22:23, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0743, 'learning_rate': 0.000324, 'epoch': 0.14} + 14%|██████████▊ | 165/1189 [11:31<1:22:23, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 166/1189 [11:35<1:21:35, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 166/1189 [11:35<1:21:35, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 166/1189 [11:35<1:21:35, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 167/1189 [11:40<1:20:50, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 167/1189 [11:40<1:20:50, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▉ | 167/1189 [11:40<1:20:50, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 168/1189 [11:45<1:19:50, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 168/1189 [11:45<1:19:50, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 168/1189 [11:45<1:19:50, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████ | 168/1189 [11:45<1:19:50, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6817, 'learning_rate': 0.00033200000000000005, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-03 07:56:11,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:56:11,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5453, 'learning_rate': 0.00033400000000000004, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-03 07:56:11,291 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 171/1189 [11:58<1:17:08, 4.55s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 171/1189 [11:58<1:17:08, 4.55s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▏ | 171/1189 [11:58<1:17:08, 4.55s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 172/1189 [12:03<1:16:45, 4.53s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 172/1189 [12:03<1:16:45, 4.53s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 173/1189 [12:07<1:16:45, 4.53s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 173/1189 [12:07<1:16:45, 4.53s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4849, 'learning_rate': 0.00034, 'epoch': 0.15} + 15%|███████████▍ | 174/1189 [12:12<1:16:42, 4.53s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 174/1189 [12:12<1:16:42, 4.53s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4156, 'learning_rate': 0.000342, 'epoch': 0.15} + 15%|███████████▍ | 175/1189 [12:16<1:16:08, 4.51s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▍ | 175/1189 [12:16<1:16:08, 4.51s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8372, 'learning_rate': 0.00034399999999999996, 'epoch': 0.15} + 15%|███████████▍ | 175/1189 [12:16<1:16:08, 4.51s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 176/1189 [12:20<1:15:29, 4.47s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 176/1189 [12:20<1:15:29, 4.47s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 176/1189 [12:20<1:15:29, 4.47s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 177/1189 [12:25<1:14:34, 4.42s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 177/1189 [12:25<1:14:34, 4.42s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▌ | 177/1189 [12:25<1:14:34, 4.42s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 178/1189 [12:29<1:14:04, 4.40s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 178/1189 [12:29<1:14:04, 4.40s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 178/1189 [12:29<1:14:04, 4.40s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 179/1189 [12:33<1:13:36, 4.37s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 179/1189 [12:33<1:13:36, 4.37s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 179/1189 [12:33<1:13:36, 4.37s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 180/1189 [12:38<1:13:10, 4.35s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 180/1189 [12:38<1:13:10, 4.35s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 181/1189 [12:42<1:12:51, 4.34s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 181/1189 [12:42<1:12:51, 4.34s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.824, 'learning_rate': 0.000356, 'epoch': 0.15} + 15%|███████████▉ | 182/1189 [12:46<1:11:58, 4.29s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 182/1189 [12:46<1:11:58, 4.29s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1254, 'learning_rate': 0.000358, 'epoch': 0.15} + 15%|████████████ | 183/1189 [12:50<1:11:14, 4.25s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 183/1189 [12:50<1:11:14, 4.25s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6313, 'learning_rate': 0.00035999999999999997, 'epoch': 0.15} + 15%|████████████ | 184/1189 [12:54<1:10:06, 4.19s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 184/1189 [12:54<1:10:06, 4.19s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8208, 'learning_rate': 0.000362, 'epoch': 0.15} + 15%|████████████ | 184/1189 [12:54<1:10:06, 4.19s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 185/1189 [12:58<1:09:13, 4.14s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 185/1189 [12:58<1:09:13, 4.14s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 185/1189 [12:58<1:09:13, 4.14s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 186/1189 [13:02<1:08:06, 4.07s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 186/1189 [13:02<1:08:06, 4.07s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 186/1189 [13:02<1:08:06, 4.07s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 187/1189 [13:06<1:06:26, 3.98s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 187/1189 [13:06<1:06:26, 3.98s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 187/1189 [13:06<1:06:26, 3.98s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 188/1189 [13:10<1:04:47, 3.88s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 188/1189 [13:10<1:04:47, 3.88s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 188/1189 [13:10<1:04:47, 3.88s/it]g-point operations will not be computed-03 07:54:56,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 189/1189 [13:13<1:03:04, 3.78s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:32,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 189/1189 [13:13<1:03:04, 3.78s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:32,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 190/1189 [13:17<1:00:32, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:32,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 190/1189 [13:17<1:00:32, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:32,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 190/1189 [13:17<1:00:32, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:32,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 191/1189 [13:20<57:55, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:38,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 191/1189 [13:20<57:55, 3.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:38,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 192/1189 [13:23<55:00, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:38,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:57:42,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:57:38,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:57:42,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:57:38,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8936, 'learning_rate': 0.00038, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-03 07:57:42,971 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:57:38,902 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████ | 194/1189 [13:28<48:55, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:46,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████ | 194/1189 [13:28<48:55, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:46,740 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████ | 195/1189 [13:30<45:45, 2.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:48,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████ | 195/1189 [13:30<45:45, 2.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:48,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 196/1189 [13:32<42:37, 2.58s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:51,029 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 196/1189 [13:32<42:37, 2.58s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:51,029 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 197/1189 [13:34<39:21, 2.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:52,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 197/1189 [13:34<39:21, 2.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:52,872 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 199/1189 [13:38<33:06, 2.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:54,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 199/1189 [13:38<33:06, 2.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:54,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 200/1189 [13:40<32:18, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:55,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 200/1189 [13:40<32:18, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:55,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 200/1189 [13:40<32:18, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 200/1189 [13:40<32:18, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 201/1189 [13:45<49:56, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 201/1189 [13:45<49:56, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 201/1189 [13:45<49:56, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 202/1189 [13:50<1:01:00, 3.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 202/1189 [13:50<1:01:00, 3.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 202/1189 [13:50<1:01:00, 3.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 203/1189 [13:55<1:07:48, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 203/1189 [13:55<1:07:48, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 203/1189 [13:55<1:07:48, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 203/1189 [13:55<1:07:48, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7526, 'learning_rate': 0.000402, 'epoch': 0.17} + 17%|█████████████▎ | 203/1189 [13:55<1:07:48, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 203/1189 [13:55<1:07:48, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 205/1189 [14:05<1:15:28, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 205/1189 [14:05<1:15:28, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 206/1189 [14:10<1:17:19, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 206/1189 [14:10<1:17:19, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9076, 'learning_rate': 0.00040600000000000006, 'epoch': 0.17} + 17%|█████████████▌ | 206/1189 [14:10<1:17:19, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 207/1189 [14:16<1:19:04, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 207/1189 [14:16<1:19:04, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 207/1189 [14:16<1:19:04, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 207/1189 [14:16<1:19:04, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9574, 'learning_rate': 0.00041, 'epoch': 0.17} + 17%|█████████████▌ | 207/1189 [14:16<1:19:04, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 207/1189 [14:16<1:19:04, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 209/1189 [14:26<1:20:18, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 209/1189 [14:26<1:20:18, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▋ | 209/1189 [14:26<1:20:18, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 210/1189 [14:30<1:20:06, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▊ | 210/1189 [14:30<1:20:06, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7074, 'learning_rate': 0.000416, 'epoch': 0.18} + [WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 212/1189 [14:40<1:18:35, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 212/1189 [14:40<1:18:35, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 213/1189 [14:45<1:18:18, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|█████████████▉ | 213/1189 [14:45<1:18:18, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7222, 'learning_rate': 0.00042, 'epoch': 0.18} + 18%|██████████████ | 214/1189 [14:50<1:17:55, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 214/1189 [14:50<1:17:55, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3001, 'learning_rate': 0.000422, 'epoch': 0.18} + 18%|██████████████ | 214/1189 [14:50<1:17:55, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 215/1189 [14:54<1:17:46, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 215/1189 [14:54<1:17:46, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 215/1189 [14:54<1:17:46, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 216/1189 [14:59<1:17:20, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 216/1189 [14:59<1:17:20, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 217/1189 [15:04<1:16:21, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▏ | 217/1189 [15:04<1:16:21, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0886, 'learning_rate': 0.000428, 'epoch': 0.18} + 18%|██████████████▏ | 217/1189 [15:04<1:16:21, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:57:59,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 218/1189 [15:08<1:16:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 218/1189 [15:08<1:16:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 218/1189 [15:08<1:16:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5595, 'learning_rate': 0.000432, 'epoch': 0.18} + 18%|██████████████▎ | 218/1189 [15:08<1:16:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 218/1189 [15:08<1:16:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 220/1189 [15:17<1:14:46, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 220/1189 [15:17<1:14:46, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 221/1189 [15:22<1:14:01, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▍ | 221/1189 [15:22<1:14:01, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8637, 'learning_rate': 0.000436, 'epoch': 0.19} + 19%|██████████████▌ | 222/1189 [15:26<1:13:46, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 222/1189 [15:26<1:13:46, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:59:48,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:59:48,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1594, 'learning_rate': 0.00044, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-03 07:59:48,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 07:59:48,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 224/1189 [15:35<1:13:08, 4.55s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 224/1189 [15:35<1:13:08, 4.55s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 225/1189 [15:40<1:12:04, 4.49s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 225/1189 [15:40<1:12:04, 4.49s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0385, 'learning_rate': 0.000444, 'epoch': 0.19} + 19%|██████████████▊ | 226/1189 [15:44<1:11:33, 4.46s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▊ | 226/1189 [15:44<1:11:33, 4.46s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7914, 'learning_rate': 0.000446, 'epoch': 0.19} + 19%|██████████████▉ | 227/1189 [15:49<1:10:44, 4.41s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 227/1189 [15:49<1:10:44, 4.41s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0119, 'learning_rate': 0.000448, 'epoch': 0.19} + 19%|██████████████▉ | 228/1189 [15:53<1:10:22, 4.39s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 228/1189 [15:53<1:10:22, 4.39s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3607, 'learning_rate': 0.00045000000000000004, 'epoch': 0.19} + 19%|███████████████ | 229/1189 [15:57<1:09:45, 4.36s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 229/1189 [15:57<1:09:45, 4.36s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1186, 'learning_rate': 0.00045200000000000004, 'epoch': 0.19} + 19%|███████████████ | 230/1189 [16:01<1:09:07, 4.33s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 230/1189 [16:01<1:09:07, 4.33s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9873, 'learning_rate': 0.00045400000000000003, 'epoch': 0.19} + 19%|███████████████▏ | 231/1189 [16:06<1:08:16, 4.28s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████▏ | 231/1189 [16:06<1:08:16, 4.28s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9773, 'learning_rate': 0.000456, 'epoch': 0.19} + 20%|███████████████▏ | 232/1189 [16:10<1:07:15, 4.22s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▏ | 232/1189 [16:10<1:07:15, 4.22s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8306, 'learning_rate': 0.000458, 'epoch': 0.2} + 20%|███████████████▏ | 232/1189 [16:10<1:07:15, 4.22s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▎ | 233/1189 [16:14<1:06:44, 4.19s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▎ | 233/1189 [16:14<1:06:44, 4.19s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▎ | 233/1189 [16:14<1:06:44, 4.19s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▎ | 234/1189 [16:18<1:05:25, 4.11s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▎ | 234/1189 [16:18<1:05:25, 4.11s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▎ | 234/1189 [16:18<1:05:25, 4.11s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 235/1189 [16:22<1:04:11, 4.04s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 235/1189 [16:22<1:04:11, 4.04s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 235/1189 [16:22<1:04:11, 4.04s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 236/1189 [16:25<1:03:17, 3.98s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 236/1189 [16:25<1:03:17, 3.98s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▍ | 236/1189 [16:25<1:03:17, 3.98s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 237/1189 [16:29<1:02:29, 3.94s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 237/1189 [16:29<1:02:29, 3.94s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 237/1189 [16:29<1:02:29, 3.94s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 238/1189 [16:33<1:01:45, 3.90s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 238/1189 [16:33<1:01:45, 3.90s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▌ | 238/1189 [16:33<1:01:45, 3.90s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 239/1189 [16:37<1:00:32, 3.82s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:00:57,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:00:57,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7947, 'learning_rate': 0.000474, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-03 08:00:57,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 241/1189 [16:44<57:34, 3.64s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 241/1189 [16:44<57:34, 3.64s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▏ | 241/1189 [16:44<57:34, 3.64s/it]g-point operations will not be computed-03 07:59:28,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 242/1189 [16:47<55:34, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:06,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 242/1189 [16:47<55:34, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:06,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 243/1189 [16:50<53:20, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:06,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 243/1189 [16:50<53:20, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:06,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 243/1189 [16:50<53:20, 3.38s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:06,134 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 244/1189 [16:53<50:39, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:11,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 244/1189 [16:53<50:39, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:11,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 245/1189 [16:55<47:38, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 245/1189 [16:55<47:38, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 246/1189 [16:58<44:17, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 246/1189 [16:58<44:17, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:17,416 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:17,416 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:19,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:19,243 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:20,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:20,801 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:22,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:22,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:01:22,623 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 251/1189 [17:10<47:08, 3.02s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 251/1189 [17:10<47:08, 3.02s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 251/1189 [17:10<47:08, 3.02s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 252/1189 [17:15<57:04, 3.66s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 252/1189 [17:15<57:04, 3.66s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 252/1189 [17:15<57:04, 3.66s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 253/1189 [17:20<1:03:13, 4.05s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 253/1189 [17:20<1:03:13, 4.05s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 254/1189 [17:25<1:07:33, 4.33s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 254/1189 [17:25<1:07:33, 4.33s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9493, 'learning_rate': 0.0005020000000000001, 'epoch': 0.21} + 21%|████████████████▋ | 254/1189 [17:25<1:07:33, 4.33s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 255/1189 [17:30<1:10:08, 4.51s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 255/1189 [17:30<1:10:08, 4.51s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 256/1189 [17:35<1:12:29, 4.66s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 256/1189 [17:35<1:12:29, 4.66s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1059, 'learning_rate': 0.000506, 'epoch': 0.22} + 22%|████████████████▊ | 256/1189 [17:35<1:12:29, 4.66s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 257/1189 [17:40<1:13:19, 4.72s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 257/1189 [17:40<1:13:19, 4.72s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 257/1189 [17:40<1:13:19, 4.72s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 258/1189 [17:45<1:13:47, 4.76s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 258/1189 [17:45<1:13:47, 4.76s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 258/1189 [17:45<1:13:47, 4.76s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 259/1189 [17:50<1:14:18, 4.79s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 259/1189 [17:50<1:14:18, 4.79s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▉ | 259/1189 [17:50<1:14:18, 4.79s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 260/1189 [17:55<1:14:32, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 260/1189 [17:55<1:14:32, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 260/1189 [17:55<1:14:32, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 261/1189 [18:00<1:14:20, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 261/1189 [18:00<1:14:20, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 261/1189 [18:00<1:14:20, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████ | 261/1189 [18:00<1:14:20, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1741, 'learning_rate': 0.000518, 'epoch': 0.22} + 22%|█████████████████ | 261/1189 [18:00<1:14:20, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|███████████��█████ | 261/1189 [18:00<1:14:20, 4.81s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 263/1189 [18:09<1:13:56, 4.79s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 263/1189 [18:09<1:13:56, 4.79s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 264/1189 [18:14<1:13:45, 4.78s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▎ | 264/1189 [18:14<1:13:45, 4.78s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4247, 'learning_rate': 0.000522, 'epoch': 0.22} + 22%|█████████████████▍ | 265/1189 [18:19<1:13:08, 4.75s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 265/1189 [18:19<1:13:08, 4.75s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0175, 'learning_rate': 0.000524, 'epoch': 0.22} + 22%|█████████████████▍ | 265/1189 [18:19<1:13:08, 4.75s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 266/1189 [18:23<1:12:39, 4.72s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 266/1189 [18:23<1:12:39, 4.72s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 266/1189 [18:23<1:12:39, 4.72s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 267/1189 [18:28<1:12:12, 4.70s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▌ | 267/1189 [18:28<1:12:12, 4.70s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 268/1189 [18:33<1:12:11, 4.70s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▌ | 268/1189 [18:33<1:12:11, 4.70s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9458, 'learning_rate': 0.0005300000000000001, 'epoch': 0.23} + 23%|█████████████████▋ | 269/1189 [18:37<1:11:32, 4.67s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 269/1189 [18:37<1:11:32, 4.67s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0877, 'learning_rate': 0.000532, 'epoch': 0.23} + 23%|█████████████████▋ | 269/1189 [18:37<1:11:32, 4.67s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 269/1189 [18:37<1:11:32, 4.67s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8471, 'learning_rate': 0.0005340000000000001, 'epoch': 0.23} + 23%|█████████████████▋ | 269/1189 [18:37<1:11:32, 4.67s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 269/1189 [18:37<1:11:32, 4.67s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 271/1189 [18:46<1:11:05, 4.65s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 271/1189 [18:46<1:11:05, 4.65s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 272/1189 [18:51<1:10:10, 4.59s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▊ | 272/1189 [18:51<1:10:10, 4.59s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7942, 'learning_rate': 0.0005380000000000001, 'epoch': 0.23} + 23%|█████████████████▉ | 273/1189 [18:55<1:08:59, 4.52s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 273/1189 [18:55<1:08:59, 4.52s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8114, 'learning_rate': 0.00054, 'epoch': 0.23} + 23%|█████████████████▉ | 274/1189 [19:00<1:08:07, 4.47s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 274/1189 [19:00<1:08:07, 4.47s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8993, 'learning_rate': 0.0005420000000000001, 'epoch': 0.23} + 23%|██████████████████ | 275/1189 [19:04<1:07:34, 4.44s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 275/1189 [19:04<1:07:34, 4.44s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8725, 'learning_rate': 0.0005440000000000001, 'epoch': 0.23} + 23%|██████████████████ | 276/1189 [19:08<1:06:57, 4.40s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 276/1189 [19:08<1:06:57, 4.40s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6347, 'learning_rate': 0.000546, 'epoch': 0.23} + 23%|██████████████████▏ | 277/1189 [19:13<1:06:22, 4.37s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 277/1189 [19:13<1:06:22, 4.37s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6725, 'learning_rate': 0.0005480000000000001, 'epoch': 0.23} + 23%|██████████████████▏ | 277/1189 [19:13<1:06:22, 4.37s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 278/1189 [19:17<1:05:35, 4.32s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 278/1189 [19:17<1:05:35, 4.32s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 279/1189 [19:21<1:04:58, 4.28s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 279/1189 [19:21<1:04:58, 4.28s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1579, 'learning_rate': 0.0005520000000000001, 'epoch': 0.23} + 24%|██████████████████▎ | 280/1189 [19:25<1:04:10, 4.24s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▎ | 280/1189 [19:25<1:04:10, 4.24s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9103, 'learning_rate': 0.000554, 'epoch': 0.24} + 24%|██████████████████▍ | 281/1189 [19:29<1:03:45, 4.21s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 281/1189 [19:29<1:03:45, 4.21s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9805, 'learning_rate': 0.0005560000000000001, 'epoch': 0.24} + 24%|██████████████████▍ | 281/1189 [19:29<1:03:45, 4.21s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 282/1189 [19:33<1:02:26, 4.13s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 282/1189 [19:33<1:02:26, 4.13s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▍ | 282/1189 [19:33<1:02:26, 4.13s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 283/1189 [19:37<1:01:34, 4.08s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 283/1189 [19:37<1:01:34, 4.08s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▌ | 283/1189 [19:37<1:01:34, 4.08s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 284/1189 [19:41<1:00:33, 4.02s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 284/1189 [19:41<1:00:33, 4.02s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▋ | 284/1189 [19:41<1:00:33, 4.02s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 285/1189 [19:45<59:35, 3.96s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 285/1189 [19:45<59:35, 3.96s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 285/1189 [19:45<59:35, 3.96s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 286/1189 [19:49<58:55, 3.92s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 286/1189 [19:49<58:55, 3.92s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▏ | 286/1189 [19:49<58:55, 3.92s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 287/1189 [19:52<57:57, 3.86s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 287/1189 [19:52<57:57, 3.86s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:13,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:13,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:13,612 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 289/1189 [19:59<55:38, 3.71s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 289/1189 [19:59<55:38, 3.71s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 289/1189 [19:59<55:38, 3.71s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 290/1189 [20:03<54:09, 3.61s/it]g-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:23,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:23,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1021, 'learning_rate': 0.000576, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:23,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:01:14,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 292/1189 [20:09<49:30, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 292/1189 [20:09<49:30, 3.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 293/1189 [20:12<46:09, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 293/1189 [20:12<46:09, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:31,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:31,563 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:33,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:33,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:35,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:35,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:37,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:37,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:39,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:39,091 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:40,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:40,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:42,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:04:42,318 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 301/1189 [20:30<43:14, 2.92s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 301/1189 [20:30<43:14, 2.92s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1204, 'learning_rate': 0.000596, 'epoch': 0.25} + 25%|████████████████████▎ | 301/1189 [20:30<43:14, 2.92s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 302/1189 [20:35<52:36, 3.56s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 302/1189 [20:35<52:36, 3.56s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 302/1189 [20:35<52:36, 3.56s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:40<59:09, 4.01s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:40<59:09, 4.01s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:40<59:09, 4.01s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:40<59:09, 4.01s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8753, 'learning_rate': 0.000602, 'epoch': 0.26} + 25%|████████████████████▍ | 303/1189 [20:40<59:09, 4.01s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:40<59:09, 4.01s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:50<1:07:02, 4.55s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:50<1:07:02, 4.55s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:50<1:07:02, 4.55s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 306/1189 [20:55<1:09:16, 4.71s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 306/1189 [20:55<1:09:16, 4.71s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 306/1189 [20:55<1:09:16, 4.71s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:00<1:10:02, 4.76s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:00<1:10:02, 4.76s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:00<1:10:02, 4.76s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:00<1:10:02, 4.76s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9019, 'learning_rate': 0.00061, 'epoch': 0.26} + 26%|████████████████████▏ | 307/1189 [21:00<1:10:02, 4.76s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:00<1:10:02, 4.76s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:10<1:10:43, 4.82s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:10<1:10:43, 4.82s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:10<1:10:43, 4.82s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 310/1189 [21:15<1:10:34, 4.82s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 310/1189 [21:15<1:10:34, 4.82s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 311/1189 [21:20<1:10:52, 4.84s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 311/1189 [21:20<1:10:52, 4.84s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8753, 'learning_rate': 0.000616, 'epoch': 0.26} + 26%|████████████████████▍ | 311/1189 [21:20<1:10:52, 4.84s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 312/1189 [21:25<1:11:03, 4.86s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 312/1189 [21:25<1:11:03, 4.86s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 313/1189 [21:30<1:10:57, 4.86s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 313/1189 [21:30<1:10:57, 4.86s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9045, 'learning_rate': 0.00062, 'epoch': 0.26} + 26%|████████████████████▌ | 313/1189 [21:30<1:10:57, 4.86s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 314/1189 [21:34<1:10:32, 4.84s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 314/1189 [21:34<1:10:32, 4.84s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 314/1189 [21:34<1:10:32, 4.84s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:39<1:09:54, 4.80s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:39<1:09:54, 4.80s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 316/1189 [21:44<1:09:39, 4.79s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▋ | 316/1189 [21:44<1:09:39, 4.79s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1032, 'learning_rate': 0.000626, 'epoch': 0.27} + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4256, 'learning_rate': 0.000628, 'epoch': 0.27} + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 318/1189 [21:53<1:08:43, 4.73s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▊ | 318/1189 [21:53<1:08:43, 4.73s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 319/1189 [21:58<1:08:32, 4.73s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 319/1189 [21:58<1:08:32, 4.73s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1324, 'learning_rate': 0.000632, 'epoch': 0.27} + 27%|████████████████████▉ | 320/1189 [22:03<1:07:57, 4.69s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|████████████████████▉ | 320/1189 [22:03<1:07:57, 4.69s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8646, 'learning_rate': 0.000634, 'epoch': 0.27} + 27%|████████████████████▉ | 320/1189 [22:03<1:07:57, 4.69s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 321/1189 [22:07<1:07:49, 4.69s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 321/1189 [22:07<1:07:49, 4.69s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 321/1189 [22:07<1:07:49, 4.69s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 322/1189 [22:12<1:07:13, 4.65s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████ | 322/1189 [22:12<1:07:13, 4.65s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 323/1189 [22:16<1:06:35, 4.61s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▏ | 323/1189 [22:16<1:06:35, 4.61s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7852, 'learning_rate': 0.00064, 'epoch': 0.27} + 27%|█████████████████████▎ | 324/1189 [22:21<1:06:05, 4.58s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 324/1189 [22:21<1:06:05, 4.58s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3777, 'learning_rate': 0.000642, 'epoch': 0.27} + 27%|█████████████████████▎ | 325/1189 [22:25<1:05:10, 4.53s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▎ | 325/1189 [22:25<1:05:10, 4.53s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7624, 'learning_rate': 0.000644, 'epoch': 0.27} + 27%|█████████████████████▍ | 326/1189 [22:30<1:04:54, 4.51s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|█████████████████████▍ | 326/1189 [22:30<1:04:54, 4.51s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4472, 'learning_rate': 0.000646, 'epoch': 0.27} + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2027, 'learning_rate': 0.000648, 'epoch': 0.27} + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1171, 'learning_rate': 0.0006500000000000001, 'epoch': 0.28} + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 329/1189 [22:43<1:03:05, 4.40s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 329/1189 [22:43<1:03:05, 4.40s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▌ | 329/1189 [22:43<1:03:05, 4.40s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▋ | 330/1189 [22:47<1:02:27, 4.36s/it]g-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:08,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:08,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0761, 'learning_rate': 0.000656, 'epoch': 0.28} +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:08,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:08,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9278, 'learning_rate': 0.0006580000000000001, 'epoch': 0.28} +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:08,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:08,905 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:04:27,960 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▊ | 333/1189 [23:00<1:00:53, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▊ | 333/1189 [23:00<1:00:53, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 334/1189 [23:04<1:00:45, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 334/1189 [23:04<1:00:45, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|█████████████████████▉ | 334/1189 [23:04<1:00:45, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 335/1189 [23:08<59:50, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 335/1189 [23:08<59:50, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 335/1189 [23:08<59:50, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 336/1189 [23:12<58:54, 4.14s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 336/1189 [23:12<58:54, 4.14s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▌ | 336/1189 [23:12<58:54, 4.14s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 337/1189 [23:16<57:41, 4.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 337/1189 [23:16<57:41, 4.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 337/1189 [23:16<57:41, 4.06s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 338/1189 [23:20<56:13, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 338/1189 [23:20<56:13, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|██████████████████████▋ | 338/1189 [23:20<56:13, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▊ | 339/1189 [23:23<54:35, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▊ | 339/1189 [23:23<54:35, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▊ | 339/1189 [23:23<54:35, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:19,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 340/1189 [23:27<53:00, 3.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:46,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 340/1189 [23:27<53:00, 3.75s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:46,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 341/1189 [23:30<50:57, 3.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:46,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 341/1189 [23:30<50:57, 3.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:46,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|██████████████████████▉ | 341/1189 [23:30<50:57, 3.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:46,118 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 342/1189 [23:33<48:43, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 342/1189 [23:33<48:43, 3.45s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 343/1189 [23:36<46:15, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████ | 343/1189 [23:36<46:15, 3.28s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:56,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:56,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:07:56,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:07:52,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 345/1189 [23:41<41:04, 2.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:00,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▏ | 345/1189 [23:41<41:04, 2.92s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:00,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 346/1189 [23:43<38:10, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:02,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 346/1189 [23:43<38:10, 2.72s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:02,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 347/1189 [23:45<35:17, 2.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:04,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▎ | 347/1189 [23:45<35:17, 2.51s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:04,065 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6626, 'learning_rate': 0.00069, 'epoch': 0.29} + 29%|███████████████████████▍ | 349/1189 [23:49<29:01, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:05,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▍ | 349/1189 [23:49<29:01, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:05,785 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▌ | 350/1189 [23:51<27:47, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:07,148 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▌ | 350/1189 [23:51<27:47, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 29%|███████████████████████▌ | 350/1189 [23:51<27:47, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 351/1189 [23:56<41:41, 2.98s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 351/1189 [23:56<41:41, 2.98s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 352/1189 [24:01<50:47, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 352/1189 [24:01<50:47, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3456, 'learning_rate': 0.0006979999999999999, 'epoch': 0.3} + 30%|███████████████████████▋ | 352/1189 [24:01<50:47, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 353/1189 [24:06<56:54, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▊ | 353/1189 [24:06<56:54, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▏ | 354/1189 [24:11<1:01:05, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▏ | 354/1189 [24:11<1:01:05, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.147, 'learning_rate': 0.0007019999999999999, 'epoch': 0.3} + 30%|███████████████████████▏ | 354/1189 [24:11<1:01:05, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 355/1189 [24:16<1:03:24, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 355/1189 [24:16<1:03:24, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 355/1189 [24:16<1:03:24, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 356/1189 [24:21<1:05:16, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 356/1189 [24:21<1:05:16, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▎ | 356/1189 [24:21<1:05:16, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 357/1189 [24:26<1:06:27, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 357/1189 [24:26<1:06:27, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 357/1189 [24:26<1:06:27, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 358/1189 [24:31<1:07:07, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 358/1189 [24:31<1:07:07, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▍ | 358/1189 [24:31<1:07:07, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 359/1189 [24:36<1:07:19, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 359/1189 [24:36<1:07:19, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 359/1189 [24:36<1:07:19, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 360/1189 [24:41<1:07:31, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▌ | 360/1189 [24:41<1:07:31, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 361/1189 [24:46<1:07:16, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 361/1189 [24:46<1:07:16, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9228, 'learning_rate': 0.000716, 'epoch': 0.3} + 30%|███████████████████████▋ | 361/1189 [24:46<1:07:16, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 362/1189 [24:51<1:07:10, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 362/1189 [24:51<1:07:10, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 30%|███████████████████████▋ | 362/1189 [24:51<1:07:10, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 363/1189 [24:56<1:07:01, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 363/1189 [24:56<1:07:01, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 363/1189 [24:56<1:07:01, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▊ | 363/1189 [24:56<1:07:01, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0136, 'learning_rate': 0.000722, 'epoch': 0.31} + 31%|███████████████████████▊ | 363/1189 [24:56<1:07:01, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 365/1189 [25:05<1:06:10, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 365/1189 [25:05<1:06:10, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|███████████████████████▉ | 365/1189 [25:05<1:06:10, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 366/1189 [25:10<1:05:40, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 366/1189 [25:10<1:05:40, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 367/1189 [25:15<1:05:19, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 367/1189 [25:15<1:05:19, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0185, 'learning_rate': 0.000728, 'epoch': 0.31} + 31%|████████████████████████ | 367/1189 [25:15<1:05:19, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 367/1189 [25:15<1:05:19, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.243, 'learning_rate': 0.00073, 'epoch': 0.31} + 31%|████████████████████████ | 367/1189 [25:15<1:05:19, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████ | 367/1189 [25:15<1:05:19, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 369/1189 [25:24<1:04:18, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▏ | 369/1189 [25:24<1:04:18, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 370/1189 [25:29<1:04:01, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 370/1189 [25:29<1:04:01, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2142, 'learning_rate': 0.000734, 'epoch': 0.31} + 31%|████████████████████████▎ | 371/1189 [25:33<1:03:39, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▎ | 371/1189 [25:33<1:03:39, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3355, 'learning_rate': 0.000736, 'epoch': 0.31} + 31%|████████████████████████▍ | 372/1189 [25:38<1:02:59, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 372/1189 [25:38<1:02:59, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1462, 'learning_rate': 0.000738, 'epoch': 0.31} + 31%|████████████████████████▍ | 372/1189 [25:38<1:02:59, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 373/1189 [25:42<1:02:09, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 373/1189 [25:42<1:02:09, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▍ | 373/1189 [25:42<1:02:09, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▌ | 374/1189 [25:47<1:01:35, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▌ | 374/1189 [25:47<1:01:35, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 31%|████████████████████████▌ | 374/1189 [25:47<1:01:35, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 375/1189 [25:51<1:01:18, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 375/1189 [25:51<1:01:18, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▌ | 375/1189 [25:51<1:01:18, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 376/1189 [25:56<1:00:49, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|████████████████████████▋ | 376/1189 [25:56<1:00:49, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 377/1189 [26:00<59:59, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▎ | 377/1189 [26:00<59:59, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8618, 'learning_rate': 0.000748, 'epoch': 0.32} + 32%|█████████████████████████▍ | 378/1189 [26:04<59:19, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▍ | 378/1189 [26:04<59:19, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.868, 'learning_rate': 0.00075, 'epoch': 0.32} + 32%|█████████████████████████▌ | 379/1189 [26:08<58:53, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 379/1189 [26:08<58:53, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1074, 'learning_rate': 0.0007520000000000001, 'epoch': 0.32} + 32%|█████████████████████████▌ | 379/1189 [26:08<58:53, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 379/1189 [26:08<58:53, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1586, 'learning_rate': 0.000754, 'epoch': 0.32} + 32%|█████████████████████████▌ | 379/1189 [26:08<58:53, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▌ | 379/1189 [26:08<58:53, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:08:11,043 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|██���██████████████████████▋ | 381/1189 [26:17<57:50, 4.30s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▋ | 382/1189 [26:21<57:19, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▋ | 382/1189 [26:21<57:19, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0195, 'learning_rate': 0.000758, 'epoch': 0.32} + 32%|█████████████████████████▊ | 383/1189 [26:25<56:07, 4.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 383/1189 [26:25<56:07, 4.18s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0721, 'learning_rate': 0.00076, 'epoch': 0.32} + 32%|█████████████████████████▊ | 384/1189 [26:29<55:22, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▊ | 384/1189 [26:29<55:22, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4243, 'learning_rate': 0.000762, 'epoch': 0.32} + 32%|█████████████████████████▉ | 385/1189 [26:33<54:42, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 385/1189 [26:33<54:42, 4.08s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1279, 'learning_rate': 0.000764, 'epoch': 0.32} + 32%|█████████████████████████▉ | 386/1189 [26:37<53:37, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 32%|█████████████████████████▉ | 386/1189 [26:37<53:37, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0395, 'learning_rate': 0.0007660000000000001, 'epoch': 0.32} + 33%|██████████████████████████ | 387/1189 [26:41<52:39, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 387/1189 [26:41<52:39, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1286, 'learning_rate': 0.000768, 'epoch': 0.33} + 33%|██████████████████████████ | 388/1189 [26:44<51:20, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████ | 388/1189 [26:44<51:20, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0282, 'learning_rate': 0.0007700000000000001, 'epoch': 0.33} + 33%|██████████████████████████ | 388/1189 [26:44<51:20, 3.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:10:36,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 389/1189 [26:48<49:49, 3.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 390/1189 [26:51<48:11, 3.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▏ | 390/1189 [26:51<48:11, 3.62s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:11:11,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:11:11,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:11:11,956 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▍ | 392/1189 [26:57<44:10, 3.33s/it]g-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▍ | 392/1189 [26:57<44:10, 3.33s/it]g-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:11:17,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:11:17,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1329, 'learning_rate': 0.0007800000000000001, 'epoch': 0.33} + 33%|██████████████████████████▌ | 394/1189 [27:03<39:22, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:21,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 394/1189 [27:03<39:22, 2.97s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:21,460 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 395/1189 [27:05<37:00, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:23,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▌ | 395/1189 [27:05<37:00, 2.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:23,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 396/1189 [27:07<34:13, 2.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:25,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 396/1189 [27:07<34:13, 2.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:25,700 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 397/1189 [27:09<31:10, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:27,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 33%|██████████████████████████▋ | 397/1189 [27:09<31:10, 2.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:27,453 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 399/1189 [27:12<25:24, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:28,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 399/1189 [27:12<25:24, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:28,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3745, 'learning_rate': 0.00079, 'epoch': 0.33} + 34%|██████████████████████████▉ | 400/1189 [27:14<24:47, 1.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:30,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 400/1189 [27:14<24:47, 1.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:30,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 400/1189 [27:14<24:47, 1.89s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 401/1189 [27:19<38:38, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 401/1189 [27:19<38:38, 2.94s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6052, 'learning_rate': 0.000796, 'epoch': 0.34} + 34%|███████████████████████████ | 402/1189 [27:24<47:11, 3.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 402/1189 [27:24<47:11, 3.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3085, 'learning_rate': 0.0007980000000000001, 'epoch': 0.34} + 34%|███████████████████████████ | 402/1189 [27:24<47:11, 3.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 403/1189 [27:29<53:20, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████ | 403/1189 [27:29<53:20, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 404/1189 [27:34<56:45, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 404/1189 [27:34<56:45, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3295, 'learning_rate': 0.0008020000000000001, 'epoch': 0.34} + 34%|███████████████████████████▏ | 404/1189 [27:34<56:45, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 405/1189 [27:39<59:14, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 405/1189 [27:39<59:14, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|███████████████████████████▏ | 405/1189 [27:39<59:14, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▋ | 406/1189 [27:44<1:00:47, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▋ | 406/1189 [27:44<1:00:47, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▋ | 407/1189 [27:49<1:01:49, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▋ | 407/1189 [27:49<1:01:49, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1525, 'learning_rate': 0.000808, 'epoch': 0.34} + 34%|██████████████████████████▋ | 407/1189 [27:49<1:01:49, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 408/1189 [27:54<1:02:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 408/1189 [27:54<1:02:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 408/1189 [27:54<1:02:38, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 409/1189 [27:59<1:02:59, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 409/1189 [27:59<1:02:59, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▊ | 409/1189 [27:59<1:02:59, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 410/1189 [28:04<1:03:13, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 410/1189 [28:04<1:03:13, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 34%|██████████████████████████▉ | 410/1189 [28:04<1:03:13, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|██████████████████████████▉ | 411/1189 [28:09<1:03:07, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|██████████████████████████▉ | 411/1189 [28:09<1:03:07, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████ | 412/1189 [28:14<1:02:40, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████ | 412/1189 [28:14<1:02:40, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2462, 'learning_rate': 0.0008179999999999999, 'epoch': 0.35} + 35%|███████████████████████████ | 412/1189 [28:14<1:02:40, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████ | 413/1189 [28:18<1:02:07, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████ | 413/1189 [28:18<1:02:07, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████ | 413/1189 [28:18<1:02:07, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▏ | 414/1189 [28:23<1:01:34, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▏ | 414/1189 [28:23<1:01:34, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▏ | 414/1189 [28:23<1:01:34, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▏ | 415/1189 [28:28<1:01:37, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▏ | 415/1189 [28:28<1:01:37, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▏ | 415/1189 [28:28<1:01:37, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 416/1189 [28:33<1:01:27, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 416/1189 [28:33<1:01:27, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 416/1189 [28:33<1:01:27, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 417/1189 [28:37<1:01:17, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 417/1189 [28:37<1:01:17, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▎ | 417/1189 [28:37<1:01:17, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 418/1189 [28:42<1:00:52, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 418/1189 [28:42<1:00:52, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 418/1189 [28:42<1:00:52, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 418/1189 [28:42<1:00:52, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1337, 'learning_rate': 0.000832, 'epoch': 0.35} + 35%|███████████████████████████▍ | 418/1189 [28:42<1:00:52, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|███████████████████████████▍ | 418/1189 [28:42<1:00:52, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 420/1189 [28:51<59:48, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 420/1189 [28:51<59:48, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 420/1189 [28:51<59:48, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 421/1189 [28:56<59:17, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▎ | 421/1189 [28:56<59:17, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 422/1189 [29:01<58:59, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 35%|████████████████████████████▍ | 422/1189 [29:01<58:59, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.008, 'learning_rate': 0.000838, 'epoch': 0.35} + 36%|████████████████████████████▍ | 423/1189 [29:05<58:46, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▍ | 423/1189 [29:05<58:46, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1232, 'learning_rate': 0.00084, 'epoch': 0.36} + 36%|████████████████████████████▌ | 424/1189 [29:10<58:13, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▌ | 424/1189 [29:10<58:13, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0953, 'learning_rate': 0.000842, 'epoch': 0.36} + 36%|████████████████████████████▌ | 425/1189 [29:14<57:37, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|███████████████████��████████▌ | 425/1189 [29:14<57:37, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5967, 'learning_rate': 0.000844, 'epoch': 0.36} + 36%|████████████████████████████▌ | 425/1189 [29:14<57:37, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 426/1189 [29:18<56:56, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 426/1189 [29:18<56:56, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 426/1189 [29:18<56:56, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 427/1189 [29:23<56:25, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 427/1189 [29:23<56:25, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▋ | 427/1189 [29:23<56:25, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 428/1189 [29:27<55:52, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 428/1189 [29:27<55:52, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 428/1189 [29:27<55:52, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 429/1189 [29:31<55:36, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▊ | 429/1189 [29:31<55:36, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 430/1189 [29:36<55:10, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 430/1189 [29:36<55:10, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.401, 'learning_rate': 0.000854, 'epoch': 0.36} + 36%|████████████████████████████▉ | 431/1189 [29:40<54:56, 4.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|████████████████████████████▉ | 431/1189 [29:40<54:56, 4.35s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1208, 'learning_rate': 0.000856, 'epoch': 0.36} + 36%|█████████████████████████████ | 432/1189 [29:44<54:19, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████ | 432/1189 [29:44<54:19, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0253, 'learning_rate': 0.000858, 'epoch': 0.36} + 36%|█████████████████████████████▏ | 433/1189 [29:48<53:45, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 36%|█████████████████████████████▏ | 433/1189 [29:48<53:45, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2245, 'learning_rate': 0.00086, 'epoch': 0.36} + 37%|█████████████████████████████▏ | 434/1189 [29:52<52:47, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▏ | 434/1189 [29:52<52:47, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0729, 'learning_rate': 0.000862, 'epoch': 0.36} + 37%|█████████████████████████████▎ | 435/1189 [29:56<52:09, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 435/1189 [29:56<52:09, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2137, 'learning_rate': 0.000864, 'epoch': 0.37} + 37%|█████████████████████████████▎ | 435/1189 [29:56<52:09, 4.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 436/1189 [30:00<50:49, 4.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 436/1189 [30:00<50:49, 4.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▎ | 436/1189 [30:00<50:49, 4.05s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 437/1189 [30:04<49:30, 3.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 437/1189 [30:04<49:30, 3.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 437/1189 [30:04<49:30, 3.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▍ | 438/1189 [30:08<48:26, 3.87s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3774, 'learning_rate': 0.000872, 'epoch': 0.37} +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:28,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 440/1189 [30:15<45:38, 3.66s/it]g-point operations will not be computed-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 440/1189 [30:15<45:38, 3.66s/it]g-point operations will not be computed-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▌ | 440/1189 [30:15<45:38, 3.66s/it]g-point operations will not be computed-03 08:11:34,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 441/1189 [30:18<44:07, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:37,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 442/1189 [30:21<42:21, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:37,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▋ | 442/1189 [30:21<42:21, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:37,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2605, 'learning_rate': 0.000878, 'epoch': 0.37} + 37%|█████████████████████████████▋ | 442/1189 [30:21<42:21, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:37,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 443/1189 [30:24<40:30, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:42,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 443/1189 [30:24<40:30, 3.26s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:42,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▊ | 444/1189 [30:27<38:22, 3.09s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 445/1189 [30:29<35:40, 2.88s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 37%|█████████████████████████████▉ | 445/1189 [30:29<35:40, 2.88s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:48,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:48,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:50,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:50,720 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0061, 'learning_rate': 0.000888, 'epoch': 0.38} +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:52,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:52,435 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:53,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:53,923 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:55,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:14:55,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 451/1189 [30:44<36:55, 3.00s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 451/1189 [30:44<36:55, 3.00s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7893, 'learning_rate': 0.000896, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 451/1189 [30:44<36:55, 3.00s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 452/1189 [30:49<44:49, 3.65s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 452/1189 [30:49<44:49, 3.65s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 453/1189 [30:54<50:16, 4.10s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▍ | 453/1189 [30:54<50:16, 4.10s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2482, 'learning_rate': 0.0009000000000000001, 'epoch': 0.38} + 38%|██████████████████████████████▍ | 453/1189 [30:54<50:16, 4.10s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 454/1189 [30:59<53:40, 4.38s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 454/1189 [30:59<53:40, 4.38s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 455/1189 [31:04<55:47, 4.56s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▌ | 455/1189 [31:04<55:47, 4.56s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9311, 'learning_rate': 0.0009040000000000001, 'epoch': 0.38} + 38%|██████████████████████████████▌ | 455/1189 [31:04<55:47, 4.56s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 456/1189 [31:09<57:12, 4.68s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 456/1189 [31:09<57:12, 4.68s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 457/1189 [31:14<57:54, 4.75s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▋ | 457/1189 [31:14<57:54, 4.75s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4916, 'learning_rate': 0.0009080000000000001, 'epoch': 0.38} + 39%|██████████████████████████████▊ | 458/1189 [31:19<58:13, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▊ | 458/1189 [31:19<58:13, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1491, 'learning_rate': 0.00091, 'epoch': 0.39} + 39%|██████████████████████████████▊ | 458/1189 [31:19<58:13, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▊ | 458/1189 [31:19<58:13, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3559, 'learning_rate': 0.000912, 'epoch': 0.39} + 39%|██████████████████████████████▊ | 458/1189 [31:19<58:13, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▉ | 460/1189 [31:28<58:23, 4.81s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|██████████████████████████████▉ | 460/1189 [31:28<58:23, 4.81s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4161, 'learning_rate': 0.0009140000000000001, 'epoch': 0.39} + 39%|███████████████████████████████ | 461/1189 [31:33<58:12, 4.80s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████ | 461/1189 [31:33<58:12, 4.80s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4883, 'learning_rate': 0.000916, 'epoch': 0.39} + 39%|███████████████████████████████ | 462/1189 [31:38<57:58, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████ | 462/1189 [31:38<57:58, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2702, 'learning_rate': 0.0009180000000000001, 'epoch': 0.39} + 39%|███████████████████████████████▏ | 463/1189 [31:43<57:52, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▏ | 463/1189 [31:43<57:52, 4.78s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2641, 'learning_rate': 0.00092, 'epoch': 0.39} + 39%|███████████████████████████████▏ | 464/1189 [31:47<57:35, 4.77s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▏ | 464/1189 [31:47<57:35, 4.77s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9715, 'learning_rate': 0.0009220000000000001, 'epoch': 0.39} +[WARNING|modeling_utils.py:388] 2022-03-03 08:16:09,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:16:09,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:16:09,712 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3146, 'learning_rate': 0.000924, 'epoch': 0.39} + 39%|███████████████████████████████▎ | 466/1189 [31:57<57:09, 4.74s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▎ | 466/1189 [31:57<57:09, 4.74s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5785, 'learning_rate': 0.0009260000000000001, 'epoch': 0.39} + 39%|███████████████████████████████▍ | 467/1189 [32:01<57:00, 4.74s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 467/1189 [32:01<57:00, 4.74s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0883, 'learning_rate': 0.0009280000000000001, 'epoch': 0.39} + 39%|███████████████████████████████▍ | 468/1189 [32:06<56:35, 4.71s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▍ | 468/1189 [32:06<56:35, 4.71s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1286, 'learning_rate': 0.00093, 'epoch': 0.39} + 39%|███████████████████████████████▌ | 469/1189 [32:11<56:18, 4.69s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 39%|███████████████████████████████▌ | 469/1189 [32:11<56:18, 4.69s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0579, 'learning_rate': 0.0009320000000000001, 'epoch': 0.39} + 40%|███████████████████████████████▌ | 470/1189 [32:15<55:49, 4.66s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▌ | 470/1189 [32:15<55:49, 4.66s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3605, 'learning_rate': 0.000934, 'epoch': 0.4} + 40%|███████████████████████████████▋ | 471/1189 [32:20<55:32, 4.64s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 471/1189 [32:20<55:32, 4.64s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1274, 'learning_rate': 0.0009360000000000001, 'epoch': 0.4} + 40%|███████████████████████████████▋ | 471/1189 [32:20<55:32, 4.64s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▋ | 471/1189 [32:20<55:32, 4.64s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2791, 'learning_rate': 0.0009379999999999999, 'epoch': 0.4} + 40%|███████████████████████████████▋ | 471/1189 [32:20<55:32, 4.64s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 473/1189 [32:29<55:13, 4.63s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 473/1189 [32:29<55:13, 4.63s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▊ | 473/1189 [32:29<55:13, 4.63s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 474/1189 [32:34<54:59, 4.61s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 474/1189 [32:34<54:59, 4.61s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 474/1189 [32:34<54:59, 4.61s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 475/1189 [32:38<54:09, 4.55s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 475/1189 [32:38<54:09, 4.55s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|███████████████████████████████▉ | 475/1189 [32:38<54:09, 4.55s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|█████████████████████████��██████ | 476/1189 [32:43<53:33, 4.51s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 476/1189 [32:43<53:33, 4.51s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 476/1189 [32:43<53:33, 4.51s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 477/1189 [32:47<52:52, 4.46s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 477/1189 [32:47<52:52, 4.46s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████ | 477/1189 [32:47<52:52, 4.46s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 478/1189 [32:51<51:53, 4.38s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 478/1189 [32:51<51:53, 4.38s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 478/1189 [32:51<51:53, 4.38s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 479/1189 [32:55<51:34, 4.36s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 479/1189 [32:55<51:34, 4.36s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▏ | 479/1189 [32:55<51:34, 4.36s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 480/1189 [33:00<50:54, 4.31s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 480/1189 [33:00<50:54, 4.31s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 480/1189 [33:00<50:54, 4.31s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|█���██████████████████████████████▎ | 481/1189 [33:04<50:28, 4.28s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 481/1189 [33:04<50:28, 4.28s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 40%|████████████████████████████████▎ | 481/1189 [33:04<50:28, 4.28s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 482/1189 [33:08<50:05, 4.25s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 482/1189 [33:08<50:05, 4.25s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 482/1189 [33:08<50:05, 4.25s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 483/1189 [33:12<49:26, 4.20s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 483/1189 [33:12<49:26, 4.20s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▍ | 483/1189 [33:12<49:26, 4.20s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 484/1189 [33:16<48:30, 4.13s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 484/1189 [33:16<48:30, 4.13s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▌ | 484/1189 [33:16<48:30, 4.13s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 485/1189 [33:20<47:55, 4.08s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 485/1189 [33:20<47:55, 4.08s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 485/1189 [33:20<47:55, 4.08s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 486/1189 [33:24<47:24, 4.05s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 486/1189 [33:24<47:24, 4.05s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▋ | 486/1189 [33:24<47:24, 4.05s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 487/1189 [33:28<46:35, 3.98s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 487/1189 [33:28<46:35, 3.98s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 487/1189 [33:28<46:35, 3.98s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 488/1189 [33:32<45:45, 3.92s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 488/1189 [33:32<45:45, 3.92s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▊ | 488/1189 [33:32<45:45, 3.92s/it]g-point operations will not be computed-03 08:14:45,479 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 489/1189 [33:35<44:37, 3.83s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 490/1189 [33:39<43:13, 3.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|████████████████████████████████▉ | 490/1189 [33:39<43:13, 3.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9231, 'learning_rate': 0.000974, 'epoch': 0.41} + 41%|████████████████████████████████▉ | 490/1189 [33:39<43:13, 3.71s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████ | 491/1189 [33:42<41:49, 3.59s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:18:02,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:18:02,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-03 08:18:02,791 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 493/1189 [33:48<38:31, 3.32s/it]g-point operations will not be computed-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 41%|█████████████████████████████████▏ | 493/1189 [33:48<38:31, 3.32s/it]g-point operations will not be computed-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6218, 'learning_rate': 0.00098, 'epoch': 0.41} + 41%|█████████████████████████████████▏ | 493/1189 [33:48<38:31, 3.32s/it]g-point operations will not be computed-03 08:17:54,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▏ | 494/1189 [33:51<36:30, 3.15s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:09,821 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 495/1189 [33:53<34:04, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 495/1189 [33:53<34:04, 2.95s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:12,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 496/1189 [33:56<31:38, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:14,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▎ | 496/1189 [33:56<31:38, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:14,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▍ | 497/1189 [33:57<28:39, 2.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:16,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▍ | 497/1189 [33:57<28:39, 2.49s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:16,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 499/1189 [34:01<23:17, 2.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:17,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 42%|█████████████████████████████████▌ | 499/1189 [34:01<23:17, 2.03s/it][WARNING|modeling_utils.py:388] 2022-03-03 08:18:17,636 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4343, 'learning_rate': 0.00099, 'epoch': 0.42} +[INFO|trainer.py:2366] 2022-03-03 08:18:20,334 >> Num examples = 2642 | 500/1189 [34:03<22:37, 1.97s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-03 08:18:20,334 >> Num examples = 2642 | 500/1189 [34:03<22:37, 1.97s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +{'loss': 5.4469, 'learning_rate': 0.000994, 'epoch': 0.42} +[INFO|trainer.py:2366] 2022-03-03 08:18:20,334 >> Num examples = 2642 | 500/1189 [34:03<22:37, 1.97s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|trainer.py:2366] 2022-03-03 08:18:20,334 >> Num examples = 2642 | 500/1189 [34:03<22:37, 1.97s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▌ | 4/221 [00:08<08:46, 2.43s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 2%|█▉ | 5/221 [00:11<09:29, 2.64s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▎ | 6/221 [00:14<10:10, 2.84s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 3%|██▋ | 7/221 [00:18<11:09, 3.13s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|███ | 8/221 [00:21<10:54, 3.07s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 4%|███▍ | 9/221 [00:24<10:44, 3.04s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|███▋ | 10/221 [00:28<11:42, 3.33s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|████ | 11/221 [00:32<12:37, 3.61s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 5%|████▍ | 12/221 [00:35<11:46, 3.38s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|████▊ | 13/221 [00:38<11:25, 3.30s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 6%|█████▏ | 14/221 [00:42<11:31, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 7%|█████▌ | 15/221 [00:47<12:57, 3.78s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 7%|█████▉ | 16/221 [00:51<13:51, 4.06s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 8%|██████▎ | 17/221 [00:55<13:12, 3.88s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 8%|██████▋ | 18/221 [00:58<12:58, 3.84s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 9%|███████ | 19/221 [01:02<12:18, 3.66s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 9%|███████▍ | 20/221 [01:05<11:44, 3.50s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 10%|███████▊ | 21/221 [01:08<11:02, 3.31s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 10%|████████▏ | 22/221 [01:11<10:52, 3.28s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 10%|███████��▌ | 23/221 [01:14<10:34, 3.20s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 11%|████████▉ | 24/221 [01:18<11:07, 3.39s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 11%|█████████▎ | 25/221 [01:22<11:41, 3.58s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 12%|█████████▋ | 26/221 [01:26<11:52, 3.65s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 12%|██████████ | 27/221 [01:28<10:52, 3.37s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 13%|██████████▍ | 28/221 [01:32<11:22, 3.54s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 13%|██████████▊ | 29/221 [01:37<12:06, 3.78s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 14%|███████████▏ | 30/221 [01:40<11:19, 3.56s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 14%|███████████▌ | 31/221 [01:42<10:19, 3.26s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 14%|███████████▊ | 32/221 [01:45<10:14, 3.25s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 15%|████████████▏ | 33/221 [01:49<10:43, 3.42s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 15%|████████████▌ | 34/221 [01:53<10:46, 3.46s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 16%|████████████▉ | 35/221 [01:56<10:22, 3.35s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 16%|█████████████▎ | 36/221 [01:59<10:10, 3.30s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 17%|█████████████▋ | 37/221 [02:03<11:02, 3.60s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 17%|██████████████ | 38/221 [02:06<10:26, 3.42s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 18%|██████████████▍ | 39/221 [02:10<10:43, 3.54s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 18%|██████████████▊ | 40/221 [02:13<10:05, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 19%|███████████████▏ | 41/221 [02:17<10:14, 3.42s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 19%|███████████████▌ | 42/221 [02:21<11:10, 3.75s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 19%|███████████████▉ | 43/221 [02:24<10:39, 3.59s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 20%|████████████████▎ | 44/221 [02:29<11:39, 3.95s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 20%|████████████████▋ | 45/221 [02:34<12:09, 4.15s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 21%|█████████████████ | 46/221 [02:38<12:04, 4.14s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 21%|█████████████████▍ | 47/221 [02:42<11:48, 4.07s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 22%|█████████████████▊ | 48/221 [02:46<11:43, 4.06s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 22%|██████████████████▏ | 49/221 [02:50<11:20, 3.95s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 23%|██████████████████▌ | 50/221 [02:54<11:21, 3.98s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 23%|██████████████████▉ | 51/221 [02:57<10:36, 3.74s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|███████████████████▎ | 52/221 [03:00<09:56, 3.53s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|███████████████████▋ | 53/221 [03:03<09:23, 3.36s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 24%|████████████████████ | 54/221 [03:06<09:37, 3.46s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 25%|████████████████████▍ | 55/221 [03:10<09:40, 3.50s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 25%|████████████████████▊ | 56/221 [03:15<10:27, 3.80s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 26%|█████████████████████▏ | 57/221 [03:19<10:29, 3.84s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 26%|█████████████████████▌ | 58/221 [03:22<10:05, 3.72s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 27%|█████████████████████▉ | 59/221 [03:25<09:35, 3.55s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 27%|██████████████████████▎ | 60/221 [03:28<08:48, 3.28s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 28%|██████████████████████▋ | 61/221 [03:31<08:59, 3.37s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 28%|███████████████████████ | 62/221 [03:34<08:45, 3.30s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 29%|███████████████████████▍ | 63/221 [03:38<08:48, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 29%|███████████████████████▋ | 64/221 [03:41<08:45, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 29%|████████████████████████ | 65/221 [03:45<08:38, 3.32s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 30%|████████████████████████▍ | 66/221 [03:48<08:46, 3.39s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 30%|████████████████████████▊ | 67/221 [03:51<08:12, 3.20s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 31%|█████████████████████████▏ | 68/221 [03:55<08:50, 3.47s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 31%|█████████████████████████▌ | 69/221 [03:58<08:29, 3.35s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 32%|█████████████████████████▉ | 70/221 [04:01<08:24, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 32%|██████████████████████████▎ | 71/221 [04:05<08:13, 3.29s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 33%|██████████████████████████▋ | 72/221 [04:07<07:45, 3.13s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 33%|███████████████████████████ | 73/221 [04:11<08:01, 3.25s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 33%|███████████████████████████▍ | 74/221 [04:14<07:54, 3.23s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 34%|███████████████████████████▊ | 75/221 [04:17<07:48, 3.21s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 34%|████████████████████████████▏ | 76/221 [04:20<07:42, 3.19s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 35%|���███████████████████████████▌ | 77/221 [04:24<07:40, 3.20s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 35%|████████████████████████████▉ | 78/221 [04:27<07:49, 3.28s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 36%|█████████████████████████████▎ | 79/221 [04:30<07:34, 3.20s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 36%|█████████████████████████████▋ | 80/221 [04:33<07:31, 3.20s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 37%|██████████████████████████████ | 81/221 [04:37<07:53, 3.38s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 37%|██████████████████████████████▍ | 82/221 [04:41<08:24, 3.63s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 38%|██████████████████████████████▊ | 83/221 [04:46<08:51, 3.85s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 38%|███████████████████████████████▏ | 84/221 [04:50<08:53, 3.89s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 38%|███████████████████████████████▌ | 85/221 [04:54<09:14, 4.08s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 39%|███████████████████████████████▉ | 86/221 [04:58<08:57, 3.98s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 39%|████████████████████████████████▎ | 87/221 [05:02<09:13, 4.13s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 40%|████████████████████████████████▋ | 88/221 [05:06<08:38, 3.90s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 40%|█████████████████████████████████ | 89/221 [05:09<08:08, 3.70s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 41%|█████████████████████████████████▍ | 90/221 [05:13<08:04, 3.70s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 41%|█████████████████████████████████▊ | 91/221 [05:17<08:18, 3.83s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 42%|██████████████████████████████████▏ | 92/221 [05:21<08:32, 3.98s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 42%|██████████████████████████████████▌ | 93/221 [05:25<08:39, 4.06s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 43%|██████████████████████████████████▉ | 94/221 [05:29<08:25, 3.98s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 43%|███████████████████████████████████▏ | 95/221 [05:33<08:25, 4.01s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 43%|███████████████████████████████████▌ | 96/221 [05:37<08:13, 3.95s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 44%|███████████████████████████████████▉ | 97/221 [05:41<08:26, 4.08s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 44%|████████████████████████████████████▎ | 98/221 [05:45<08:13, 4.01s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 45%|████████████████████████████████████▋ | 99/221 [05:48<07:28, 3.67s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 45%|████████████████████████████████████▋ | 100/221 [05:52<07:23, 3.66s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 46%|█████████████████████████████████████ | 101/221 [05:55<07:00, 3.51s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 46%|█████████████████████████████████████▍ | 102/221 [05:58<06:37, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 47%|█████████████████████████████████████▊ | 103/221 [06:02<06:52, 3.50s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 47%|██████████████████████████████████████ | 104/221 [06:06<07:04, 3.63s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 48%|██████████████████████████████████████▍ | 105/221 [06:10<07:27, 3.86s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 48%|██████████████████████████████████████▊ | 106/221 [06:14<07:29, 3.91s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 48%|███████████████████████████████████████▏ | 107/221 [06:17<06:56, 3.65s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 49%|███████████████████████████████████████▌ | 108/221 [06:21<07:13, 3.84s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 49%|███████████████████████████████████████▉ | 109/221 [06:25<07:15, 3.89s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 50%|████████████████████████████████████████▎ | 110/221 [06:29<06:53, 3.72s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 50%|████████████████████████████████████████▋ | 111/221 [06:32<06:36, 3.60s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 51%|█████████████████████████████████████████ | 112/221 [06:36<06:40, 3.67s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 51%|█████████████████████████████████████████▍ | 113/221 [06:40<06:37, 3.68s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 52%|█████████████████████████████████████████▊ | 114/221 [06:43<06:23, 3.58s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 52%|██████████████████████████████████████████▏ | 115/221 [06:46<06:17, 3.56s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 52%|██████████████████████████████████████████▌ | 116/221 [06:50<06:03, 3.46s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 53%|██████████████████████████████████████████▉ | 117/221 [06:53<05:56, 3.43s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 53%|███████████████████████████████████████████▏ | 118/221 [06:57<06:02, 3.52s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 54%|███████████████████████████████████████████▌ | 119/221 [07:01<06:23, 3.76s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 54%|███████████████████████████████████████████▉ | 120/221 [07:05<06:37, 3.93s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 55%|████████████████████████████████████████████▎ | 121/221 [07:09<06:23, 3.83s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 55%|████████████████████████████████████████████▋ | 122/221 [07:12<05:40, 3.43s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 56%|█████████████████████████████████████████████ | 123/221 [07:14<05:04, 3.11s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 56%|█████████████████████████████████████████████▍ | 124/221 [07:17<05:00, 3.10s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 57%|█████████████████████████████████████████████▊ | 125/221 [07:21<05:20, 3.34s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 57%|██████████████████████████████████████████████▏ | 126/221 [07:24<05:01, 3.17s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 57%|██████████████████████████████████████████████▌ | 127/221 [07:26<04:47, 3.06s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 58%|██████████████████████████████████████████████▉ | 128/221 [07:29<04:30, 2.90s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 58%|███████████████████████████████████████████████▎ | 129/221 [07:33<04:48, 3.13s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 59%|███████████████████████████████████████████████▋ | 130/221 [07:35<04:31, 2.99s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 59%|████████████████████████████████████████████████ | 131/221 [07:39<04:46, 3.18s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 60%|████████████████████████████████████████████████▍ | 132/221 [07:42<04:28, 3.01s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 60%|████████████████████████████████████████████████▋ | 133/221 [07:45<04:28, 3.05s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 61%|█████████████████████████████████████████████████ | 134/221 [07:47<04:16, 2.95s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 61%|█████████████████████████████████████████████████▍ | 135/221 [07:51<04:20, 3.03s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 62%|█████████████████████████████████████████████████▊ | 136/221 [07:54<04:36, 3.26s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 62%|██████████████████████████████████████████████████▏ | 137/221 [07:58<04:38, 3.32s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 62%|██████████████████████████████████████████████████▌ | 138/221 [08:02<04:47, 3.46s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 63%|██████████████████████████████████████████████████▉ | 139/221 [08:05<04:50, 3.54s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 63%|███████████████████████████████████████████████████▎ | 140/221 [08:08<04:20, 3.22s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 64%|███████████████████████████████████████████████████▋ | 141/221 [08:11<04:17, 3.22s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 64%|████████████████████████████████████████████████████ | 142/221 [08:14<04:10, 3.17s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 65%|████████████████████████████████████████████████████▍ | 143/221 [08:17<03:49, 2.94s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 65%|████████████████████████████████████████████████████▊ | 144/221 [08:20<04:07, 3.21s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 66%|█████████████████████████████████████████████████████▏ | 145/221 [08:23<04:01, 3.18s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 66%|█���███████████████████████████████████████████████████▌ | 146/221 [08:27<04:10, 3.35s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 67%|█████████████████████████████████████████████████████▉ | 147/221 [08:30<03:55, 3.19s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 67%|██████████████████████████████████████████████████████▏ | 148/221 [08:33<03:53, 3.20s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 67%|██████████████████████████████████████████████████████▌ | 149/221 [08:37<03:51, 3.22s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 68%|██████████████████████████████████████████████████████▉ | 150/221 [08:40<03:47, 3.21s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 68%|███████████████████████████████████████████████████████▎ | 151/221 [08:43<03:56, 3.38s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 69%|███████████████████████████████████████████████████████▋ | 152/221 [08:47<03:49, 3.32s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 69%|████████████████████████████████████████████████████████ | 153/221 [08:50<03:43, 3.28s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 70%|████████████████████████████████████████████████████████▍ | 154/221 [08:53<03:44, 3.35s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 70%|████████████████████████████████████████████████████████▊ | 155/221 [08:57<03:43, 3.38s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 71%|█████████████████████████████████████████████████████████▏ | 156/221 [09:00<03:42, 3.42s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 71%|█████████████████████████████████████████████████████████▌ | 157/221 [09:03<03:25, 3.21s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 71%|█████████████████████████████████████████████████████████▉ | 158/221 [09:08<03:48, 3.63s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 72%|██████████████████████████████████████████████████████████▎ | 159/221 [09:11<03:46, 3.65s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 72%|██████████████████████████████████████████████████████████▋ | 160/221 [09:15<03:51, 3.79s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 73%|███████████████████████████████████████████████████████████ | 161/221 [09:20<03:55, 3.92s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 73%|███████████████████████████████████████████████████████████▍ | 162/221 [09:24<03:51, 3.92s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 74%|███████████████████████████████████████████████████████████▋ | 163/221 [09:28<03:55, 4.05s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 74%|████████████████████████████████████████████████████████████ | 164/221 [09:33<04:00, 4.23s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 75%|████████████████████████████████████████████████████████████▍ | 165/221 [09:36<03:45, 4.03s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 75%|████████████████████████████████████████████████████████████▊ | 166/221 [09:39<03:26, 3.75s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▏ | 167/221 [09:43<03:19, 3.69s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▌ | 168/221 [09:46<03:05, 3.49s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 76%|█████████████████████████████████████████████████████████████▉ | 169/221 [09:50<03:06, 3.58s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 77%|██████████████████████████████████████████████████████████████▎ | 170/221 [09:53<03:06, 3.66s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 77%|██████████████████████████████████████████████████████████████▋ | 171/221 [09:57<03:03, 3.67s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 78%|███████████████████████████████████████████████████████████████ | 172/221 [10:00<02:54, 3.56s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 78%|███████████████████████████████████████████████████████████████▍ | 173/221 [10:04<02:49, 3.52s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 79%|███████████████████████████████████████████████████████████████▊ | 174/221 [10:07<02:39, 3.40s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 79%|████████████████████████████████████████████████████████████████▏ | 175/221 [10:10<02:32, 3.31s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 80%|████████████████████████████████████████████████████████████████▌ | 176/221 [10:14<02:35, 3.46s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 80%|████████████████████████████████████████████████████████████████▊ | 177/221 [10:17<02:26, 3.32s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 81%|█████████████████████████████████████████████████████████████████▏ | 178/221 [10:21<02:29, 3.47s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 81%|█████████████████████████████████████████████████████████████████▌ | 179/221 [10:24<02:23, 3.41s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 81%|█████████████████████████████████████████████████████████████████▉ | 180/221 [10:28<02:29, 3.64s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 82%|██████████████████████████████████████████████████████████████████▎ | 181/221 [10:32<02:28, 3.72s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 82%|██████████████████████████████████████████████████████████████████▋ | 182/221 [10:36<02:24, 3.70s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 83%|███████████████████████████████████████████████████████████████████ | 183/221 [10:40<02:27, 3.88s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 83%|███████████████████████████████████████████████████████████████████▍ | 184/221 [10:44<02:22, 3.85s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 84%|███████████████████████████████████████████████████████████████████▊ | 185/221 [10:47<02:12, 3.68s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 84%|████████████████████████████████████████████████████████████████████▏ | 186/221 [10:52<02:16, 3.90s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 85%|████████████████████████████████████████████████████████████████████▌ | 187/221 [10:55<02:08, 3.77s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 85%|████████████████████████████████████████████████████████████████████▉ | 188/221 [10:59<02:07, 3.87s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 86%|█████████████████████████████████████████████████████████████████████▎ | 189/221 [11:03<02:06, 3.95s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 86%|█████████████████████████████████████████████████████████████████████▋ | 190/221 [11:08<02:06, 4.09s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 86%|██████████████████████████████████████████████████████████████████████ | 191/221 [11:12<02:07, 4.24s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 87%|██████████████████████████████████████████████████████████████████████▎ | 192/221 [11:17<02:03, 4.24s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 87%|██████████████████████████████████████████████████████████████████████▋ | 193/221 [11:20<01:49, 3.93s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 88%|███████████████████████████████████████████████████████████████████████ | 194/221 [11:23<01:39, 3.69s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 88%|███████████████████████████████████████████████████████████████████████▍ | 195/221 [11:26<01:30, 3.48s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 89%|███████████████████████████████████████████████████████████████████████▊ | 196/221 [11:29<01:25, 3.43s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 89%|████████████████████████████████████████████████████████████████████████▏ | 197/221 [11:32<01:17, 3.24s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 90%|████████████████████████████████████████████████████████████████████████▌ | 198/221 [11:36<01:21, 3.54s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 90%|████████████████████████████████████████████████████████████████████████▉ | 199/221 [11:41<01:23, 3.81s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 90%|█████████████████████████████████████████████████████████████████████████▎ | 200/221 [11:44<01:18, 3.72s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 91%|█████████████████████████████████████████████████████████████████████████▋ | 201/221 [11:47<01:12, 3.61s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 91%|██████████████████████████████████████████████████████████████████████████ | 202/221 [11:50<01:04, 3.41s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 92%|██████████████████████████████████████████████████████████████████████████▍ | 203/221 [11:54<01:02, 3.48s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 92%|██████████████████████████████████████████████████████████████████████████▊ | 204/221 [11:58<01:03, 3.76s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 93%|███████████████████████████████████████████████████████████████████████████▏ | 205/221 [12:03<01:04, 4.06s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 93%|███████████████████████████████████████████████████████████████████████████▌ | 206/221 [12:08<01:03, 4.22s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 94%|███████████████████████████████████████████████████████████████████████████▊ | 207/221 [12:11<00:55, 3.99s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 94%|████████████████████████████████████████████████████████████████████████████▏ | 208/221 [12:15<00:51, 3.93s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|██████████████████████████████���█████████████████████████████████████████████▌ | 209/221 [12:18<00:44, 3.74s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|████████████████████████████████████████████████████████████████████████████▉ | 210/221 [12:22<00:42, 3.82s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 95%|█████████████████████████████████████████████████████████████████████████████▎ | 211/221 [12:27<00:39, 4.00s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 96%|█████████████████████████████████████████████████████████████████████████████▋ | 212/221 [12:31<00:35, 3.91s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 96%|██████████████████████████████████████████████████████████████████████████████ | 213/221 [12:33<00:29, 3.63s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 97%|██████████████████████████████████████████████████████████████████████████████▍ | 214/221 [12:37<00:25, 3.62s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 97%|██████████████████████████████████████████████████████████████████████████████▊ | 215/221 [12:41<00:22, 3.79s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████▏ | 216/221 [12:45<00:19, 3.91s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 98%|███████████████████████████████████████████████████████████████████████████████▌ | 217/221 [12:49<00:15, 3.94s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 99%|███████████████████████████████████████████████████████████████████████████████▉ | 218/221 [12:53<00:11, 3.90s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. + 99%|████████████████████████████████████████████████████████████████████████████████▎| 219/221 [12:57<00:07, 3.88s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +100%|█████████████████████████████████████████████████████████████████████████████████| 221/221 [13:03<00:00, 3.37s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +100%|█████████████████████████████████████████████████████████████████████████████████| 221/221 [13:03<00:00, 3.37s/it][INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +03/03/2022 08:31:27 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow +[INFO|configuration_utils.py:438] 2022-03-03 08:31:27,678 >> Configuration saved in ./checkpoint-500/config.json [INFO|trainer.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|feature_extraction_utils.py:324] 2022-03-03 08:31:43,691 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonner.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|feature_extraction_utils.py:324] 2022-03-03 08:31:43,691 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonner.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +[INFO|feature_extraction_utils.py:324] 2022-03-03 08:31:43,691 >> Configuration saved in ./checkpoint-500/preprocessor_config.jsonner.py:560] 2022-03-03 08:18:20,332 >> The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: input_length. If input_length are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +03/03/2022 08:33:19 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20220303_074415-2c9ds5of/run-2c9ds5of.wandb']. This may take a bit of time if the files are large.