diff --git "a/wandb/run-20220302_154455-17zs7rwf/files/output.log" "b/wandb/run-20220302_154455-17zs7rwf/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220302_154455-17zs7rwf/files/output.log" @@ -0,0 +1,1904 @@ + + + 0%| | 0/1189 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8812, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:03,069 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%| | 1/1189 [00:05<1:56:33, 5.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:05,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0786, 'learning_rate': 0.0, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:08,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 2/1189 [00:11<1:49:12, 5.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:10,961 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0712, 'learning_rate': 6e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:13,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 3/1189 [00:16<1:48:11, 5.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:16,384 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8801, 'learning_rate': 1.2e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:18,957 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 4/1189 [00:21<1:46:26, 5.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:21,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6057, 'learning_rate': 1.8e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:24,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▎ | 5/1189 [00:26<1:44:19, 5.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:26,662 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9675, 'learning_rate': 2.4e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:29,203 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 6/1189 [00:32<1:43:15, 5.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:31,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7123, 'learning_rate': 2.9999999999999997e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:34,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▍ | 7/1189 [00:37<1:43:04, 5.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:37,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6731, 'learning_rate': 3.6e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:39,525 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 8/1189 [00:42<1:42:10, 5.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:42,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8586, 'learning_rate': 4.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:44,639 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▌ | 9/1189 [00:47<1:41:35, 5.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:47,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6587, 'learning_rate': 4.8e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:49,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 10/1189 [00:52<1:40:50, 5.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:52,218 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6914, 'learning_rate': 5.399999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:54,657 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▋ | 11/1189 [00:57<1:39:44, 5.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:45:57,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5375, 'learning_rate': 5.999999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:45:59,568 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▊ | 12/1189 [01:02<1:38:38, 5.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:02,056 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:04,500 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 13/1189 [01:07<1:37:59, 5.00s/it] + + 1%|▊ | 13/1189 [01:07<1:37:59, 5.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:06,992 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5341, 'learning_rate': 7.2e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:09,345 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 14/1189 [01:12<1:36:59, 4.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:11,916 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3981, 'learning_rate': 7.799999999999998e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:14,315 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|▉ | 15/1189 [01:17<1:37:00, 4.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:16,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:19,124 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 16/1189 [01:21<1:36:03, 4.91s/it] + + 1%|█ | 16/1189 [01:21<1:36:03, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:21,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5601, 'learning_rate': 8.999999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:23,972 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 1%|█▏ | 17/1189 [01:26<1:35:35, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:26,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6712, 'learning_rate': 9.6e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:28,736 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▏ | 18/1189 [01:31<1:34:44, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:31,172 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:33,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▎ | 19/1189 [01:36<1:33:52, 4.81s/it] + + 2%|█▎ | 19/1189 [01:36<1:33:52, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:35,897 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5952, 'learning_rate': 1.0799999999999998e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:38,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▎ | 20/1189 [01:41<1:33:20, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:40,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5382, 'learning_rate': 1.14e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:42,773 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▍ | 21/1189 [01:45<1:32:03, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:45,122 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4015, 'learning_rate': 1.1999999999999999e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:47,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▍ | 22/1189 [01:50<1:31:19, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:49,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:52,013 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5084, 'learning_rate': 1.26e-05, 'epoch': 0.02} + 2%|█▌ | 23/1189 [01:54<1:30:48, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:54,405 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:46:56,651 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▌ | 24/1189 [01:59<1:30:32, 4.66s/it] + + 2%|█▌ | 24/1189 [01:59<1:30:32, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:46:59,037 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.349, 'learning_rate': 1.3799999999999998e-05, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:01,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 2%|█▋ | 25/1189 [02:04<1:30:06, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:03,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:05,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 26/1189 [02:08<1:29:22, 4.61s/it] + + 2%|█▋ | 26/1189 [02:08<1:29:22, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:08,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:10,307 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 27/1189 [02:13<1:28:45, 4.58s/it] + + 2%|█▊ | 27/1189 [02:13<1:28:45, 4.58s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:12,585 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:14,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 28/1189 [02:17<1:28:02, 4.55s/it] + + 2%|█▊ | 28/1189 [02:17<1:28:02, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:16,992 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:19,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▉ | 29/1189 [02:22<1:27:06, 4.51s/it] + + 2%|█▉ | 29/1189 [02:22<1:27:06, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:21,406 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3368, 'learning_rate': 1.68e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:23,575 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|█▉ | 30/1189 [02:26<1:26:23, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:25,788 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4766, 'learning_rate': 1.74e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:27,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██ | 31/1189 [02:30<1:25:24, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:30,087 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2713, 'learning_rate': 1.7999999999999997e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:32,143 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 32/1189 [02:35<1:24:20, 4.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:34,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.395, 'learning_rate': 1.8599999999999998e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:36,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▏ | 33/1189 [02:39<1:23:11, 4.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:38,504 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0925, 'learning_rate': 1.92e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:40,487 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▎ | 34/1189 [02:43<1:22:11, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:42,560 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.288, 'learning_rate': 1.98e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:44,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▎ | 35/1189 [02:47<1:20:41, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:46,573 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5972, 'learning_rate': 2.04e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:48,488 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▍ | 36/1189 [02:51<1:19:21, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:50,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4117, 'learning_rate': 2.1e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:52,373 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▍ | 37/1189 [02:55<1:17:52, 4.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:54,326 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2725, 'learning_rate': 2.1599999999999996e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:56,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▌ | 38/1189 [02:59<1:16:14, 3.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:47:58,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7563, 'learning_rate': 2.2199999999999998e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:47:59,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██��� | 39/1189 [03:02<1:14:44, 3.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:01,727 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.422, 'learning_rate': 2.28e-05, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:03,463 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▋ | 40/1189 [03:06<1:12:50, 3.80s/it] + 3%|██▋ | 40/1189 [03:06<1:12:50, 3.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:05,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:06,918 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 3%|██▋ | 41/1189 [03:09<1:10:46, 3.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:08,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2827, 'learning_rate': 2.3999999999999997e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:10,141 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 42/1189 [03:13<1:07:59, 3.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:11,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:13,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 43/1189 [03:16<1:05:09, 3.41s/it] + + 4%|██▊ | 43/1189 [03:16<1:05:09, 3.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:14,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3088, 'learning_rate': 2.52e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:15,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▉ | 44/1189 [03:18<1:01:24, 3.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:17,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4834, 'learning_rate': 2.5799999999999997e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:18,434 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 45/1189 [03:21<56:59, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:19,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4258, 'learning_rate': 2.6399999999999995e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:20,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 46/1189 [03:23<52:13, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:21,655 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6226, 'learning_rate': 2.6999999999999996e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:22,557 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 47/1189 [03:25<47:42, 2.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:23,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8046, 'learning_rate': 2.7599999999999997e-05, 'epoch': 0.04} +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:24,299 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 48/1189 [03:27<43:18, 2.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:25,111 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:25,780 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▎ | 49/1189 [03:28<38:24, 2.02s/it] +{'loss': 4.9648, 'learning_rate': 2.7599999999999997e-05, 'epoch': 0.04} + 4%|███▎ | 49/1189 [03:28<38:24, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:26,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:27,534 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 50/1189 [03:30<36:51, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:30,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 50/1189 [03:30<36:51, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:30,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 51/1189 [03:35<57:50, 3.05s/it]g-point operations will not be computed-02 15:48:30,469 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 51/1189 [03:35<57:50, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 15:48:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 15:48:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 52/1189 [03:41<1:10:36, 3.73s/it]g-point operations will not be computed-02 15:48:35,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 52/1189 [03:41<1:10:36, 3.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:41,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▍ | 52/1189 [03:41<1:10:36, 3.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:41,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 53/1189 [03:46<1:18:54, 4.17s/it]g-point operations will not be computed-02 15:48:41,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▌ | 53/1189 [03:46<1:18:54, 4.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:46,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:51<1:23:49, 4.43s/it]g-point operations will not be computed-02 15:48:46,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:51<1:23:49, 4.43s/it]g-point operations will not be computed-02 15:48:46,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:51<1:23:49, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:51,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 54/1189 [03:51<1:23:49, 4.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:51,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 55/1189 [03:56<1:27:30, 4.63s/it]g-point operations will not be computed-02 15:48:51,286 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 55/1189 [03:56<1:27:30, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:48:56,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:58,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 15:48:56,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:48:58,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 15:48:56,390 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 56/1189 [04:01<1:29:50, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:01,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▋ | 56/1189 [04:01<1:29:50, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:01,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 57/1189 [04:06<1:31:18, 4.84s/it]g-point operations will not be computed-02 15:49:01,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 57/1189 [04:06<1:31:18, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:06,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 58/1189 [04:11<1:32:20, 4.90s/it]g-point operations will not be computed-02 15:49:06,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 58/1189 [04:11<1:32:20, 4.90s/it]g-point operations will not be computed-02 15:49:06,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▊ | 58/1189 [04:11<1:32:20, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:11,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 59/1189 [04:16<1:32:47, 4.93s/it]g-point operations will not be computed-02 15:49:11,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 59/1189 [04:16<1:32:47, 4.93s/it]g-point operations will not be computed-02 15:49:11,414 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 59/1189 [04:16<1:32:47, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:16,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:21<1:32:49, 4.93s/it]g-point operations will not be computed-02 15:49:16,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:21<1:32:49, 4.93s/it]g-point operations will not be computed-02 15:49:16,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▉ | 60/1189 [04:21<1:32:49, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:21,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 61/1189 [04:26<1:32:34, 4.92s/it]g-point operations will not be computed-02 15:49:21,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 61/1189 [04:26<1:32:34, 4.92s/it]g-point operations will not be computed-02 15:49:21,325 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 61/1189 [04:26<1:32:34, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:26,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 61/1189 [04:26<1:32:34, 4.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:26,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 62/1189 [04:31<1:31:57, 4.90s/it]g-point operations will not be computed-02 15:49:26,201 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 62/1189 [04:31<1:31:57, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:31,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████ | 62/1189 [04:31<1:31:57, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:31,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:36<1:31:20, 4.87s/it]g-point operations will not be computed-02 15:49:31,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:36<1:31:20, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:35,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▏ | 63/1189 [04:36<1:31:20, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:35,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 64/1189 [04:41<1:31:14, 4.87s/it]g-point operations will not be computed-02 15:49:35,837 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 64/1189 [04:41<1:31:14, 4.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:40,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:49:42,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 15:49:40,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:49:42,932 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 15:49:40,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 65/1189 [04:45<1:30:18, 4.82s/it]g-point operations will not be computed-02 15:49:40,627 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 65/1189 [04:45<1:30:18, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:45,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|████▎ | 65/1189 [04:45<1:30:18, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:45,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:50<1:29:51, 4.80s/it]g-point operations will not be computed-02 15:49:45,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:50<1:29:51, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:50,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 66/1189 [04:50<1:29:51, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:50,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 67/1189 [04:55<1:29:37, 4.79s/it]g-point operations will not be computed-02 15:49:50,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 67/1189 [04:55<1:29:37, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:54,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▍ | 67/1189 [04:55<1:29:37, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:54,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 68/1189 [04:59<1:28:40, 4.75s/it]g-point operations will not be computed-02 15:49:54,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 68/1189 [04:59<1:28:40, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:49:59,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:04<1:28:05, 4.72s/it]g-point operations will not be computed-02 15:49:59,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:04<1:28:05, 4.72s/it]g-point operations will not be computed-02 15:49:59,497 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:04<1:28:05, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:04,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▌ | 69/1189 [05:04<1:28:05, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:04,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:09<1:27:16, 4.68s/it]g-point operations will not be computed-02 15:50:04,077 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:09<1:27:16, 4.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:08,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 70/1189 [05:09<1:27:16, 4.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:08,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 71/1189 [05:13<1:26:44, 4.66s/it]g-point operations will not be computed-02 15:50:08,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▋ | 71/1189 [05:13<1:26:44, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:13,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 72/1189 [05:18<1:26:29, 4.65s/it]g-point operations will not be computed-02 15:50:13,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 72/1189 [05:18<1:26:29, 4.65s/it]g-point operations will not be computed-02 15:50:13,303 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 72/1189 [05:18<1:26:29, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:17,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 15:50:17,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 15:50:17,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 73/1189 [05:23<1:26:11, 4.63s/it]g-point operations will not be computed-02 15:50:17,921 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 73/1189 [05:23<1:26:11, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:22,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▊ | 73/1189 [05:23<1:26:11, 4.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:22,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 74/1189 [05:27<1:25:55, 4.62s/it]g-point operations will not be computed-02 15:50:22,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 74/1189 [05:27<1:25:55, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:27,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 74/1189 [05:27<1:25:55, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:27,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 75/1189 [05:32<1:25:22, 4.60s/it]g-point operations will not be computed-02 15:50:27,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 75/1189 [05:32<1:25:22, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:31,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|████▉ | 75/1189 [05:32<1:25:22, 4.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:31,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 76/1189 [05:36<1:24:18, 4.54s/it]g-point operations will not be computed-02 15:50:31,571 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 76/1189 [05:36<1:24:18, 4.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:36,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 76/1189 [05:36<1:24:18, 4.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:36,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 77/1189 [05:41<1:23:38, 4.51s/it]g-point operations will not be computed-02 15:50:36,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 6%|█████ | 77/1189 [05:41<1:23:38, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:40,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 15:50:40,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + g-point operations will not be computed-02 15:50:40,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 78/1189 [05:45<1:22:58, 4.48s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:44,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 79/1189 [05:49<1:22:06, 4.44s/it]g-point operations will not be computed-02 15:50:44,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▏ | 79/1189 [05:49<1:22:06, 4.44s/it]g-point operations will not be computed-02 15:50:44,805 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:50:51,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 15:50:49,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:50:51,252 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 15:50:49,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▎ | 80/1189 [05:54<1:21:28, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:53,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 81/1189 [05:58<1:20:38, 4.37s/it]g-point operations will not be computed-02 15:50:53,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 81/1189 [05:58<1:20:38, 4.37s/it]g-point operations will not be computed-02 15:50:53,485 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 81/1189 [05:58<1:20:38, 4.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:50:57,654 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 82/1189 [06:02<1:19:33, 4.31s/it]g-point operations will not be computed-02 15:50:57,654 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 82/1189 [06:02<1:19:33, 4.31s/it]g-point operations will not be computed-02 15:50:57,654 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▍ | 82/1189 [06:02<1:19:33, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:01,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 83/1189 [06:06<1:18:40, 4.27s/it]g-point operations will not be computed-02 15:51:01,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 83/1189 [06:06<1:18:40, 4.27s/it]g-point operations will not be computed-02 15:51:01,860 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 83/1189 [06:06<1:18:40, 4.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:05,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 84/1189 [06:10<1:17:10, 4.19s/it]g-point operations will not be computed-02 15:51:05,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 84/1189 [06:10<1:17:10, 4.19s/it]g-point operations will not be computed-02 15:51:05,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▌ | 84/1189 [06:10<1:17:10, 4.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:09,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 85/1189 [06:14<1:15:30, 4.10s/it]g-point operations will not be computed-02 15:51:09,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 85/1189 [06:14<1:15:30, 4.10s/it]g-point operations will not be computed-02 15:51:09,879 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 85/1189 [06:14<1:15:30, 4.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:13,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 86/1189 [06:18<1:14:35, 4.06s/it]g-point operations will not be computed-02 15:51:13,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 86/1189 [06:18<1:14:35, 4.06s/it]g-point operations will not be computed-02 15:51:13,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▋ | 86/1189 [06:18<1:14:35, 4.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:17,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:22<1:12:53, 3.97s/it]g-point operations will not be computed-02 15:51:17,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:22<1:12:53, 3.97s/it]g-point operations will not be computed-02 15:51:17,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 87/1189 [06:22<1:12:53, 3.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:21,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:26<1:11:03, 3.87s/it]g-point operations will not be computed-02 15:51:21,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:26<1:11:03, 3.87s/it]g-point operations will not be computed-02 15:51:21,387 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:26<1:11:03, 3.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:24,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▊ | 88/1189 [06:26<1:11:03, 3.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:24,982 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 7%|█████▉ | 89/1189 [06:29<1:09:16, 3.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:28,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 90/1189 [06:32<1:07:03, 3.66s/it]g-point operations will not be computed-02 15:51:28,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 90/1189 [06:32<1:07:03, 3.66s/it]g-point operations will not be computed-02 15:51:28,481 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|█████▉ | 90/1189 [06:32<1:07:03, 3.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:31,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 91/1189 [06:36<1:04:46, 3.54s/it]g-point operations will not be computed-02 15:51:31,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 91/1189 [06:36<1:04:46, 3.54s/it]g-point operations will not be computed-02 15:51:31,827 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:39<1:02:07, 3.40s/it]g-point operations will not be computed-02 15:51:34,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:39<1:02:07, 3.40s/it]g-point operations will not be computed-02 15:51:34,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:39<1:02:07, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:37,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████ | 92/1189 [06:39<1:02:07, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:37,914 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 93/1189 [06:42<58:55, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:40,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▎ | 93/1189 [06:42<58:55, 3.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:40,586 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:41,770 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 94/1189 [06:44<55:05, 3.02s/it] + 8%|██████▍ | 95/1189 [06:47<51:38, 2.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:43,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:44,169 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▍ | 95/1189 [06:47<51:38, 2.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:43,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 96/1189 [06:49<47:31, 2.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:45,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:46,257 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 96/1189 [06:49<47:31, 2.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:45,297 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 97/1189 [06:51<43:33, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:49,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:48,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▌ | 97/1189 [06:51<43:33, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:49,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4441, 'learning_rate': 5.5799999999999994e-05, 'epoch': 0.08} +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:49,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 98/1189 [06:52<39:37, 2.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:50,668 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5362, 'learning_rate': 5.6399999999999995e-05, 'epoch': 0.08} +{'loss': 4.534, 'learning_rate': 5.6999999999999996e-05, 'epoch': 0.08} +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:51,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▋ | 99/1189 [06:54<35:55, 1.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:52,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:53,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2088, 'learning_rate': 5.76e-05, 'epoch': 0.08} + + 8%|██████▋ | 100/1189 [06:56<34:58, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:51:56,070 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:51:58,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 8%|██████▊ | 101/1189 [07:01<54:18, 2.99s/it] + + 8%|██████▊ | 101/1189 [07:01<54:18, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:01,306 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:03,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4628, 'learning_rate': 5.88e-05, 'epoch': 0.09} + + 9%|██████▋ | 102/1189 [07:06<1:06:29, 3.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:06,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.245, 'learning_rate': 5.94e-05, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:09,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 9%|██████▊ | 103/1189 [07:11<1:14:25, 4.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:11,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:14,139 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▊ | 104/1189 [07:17<1:19:50, 4.41s/it] + + 9%|██████▊ | 104/1189 [07:17<1:19:50, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:16,749 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2792, 'learning_rate': 6.0599999999999996e-05, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:19,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 9%|██████▉ | 105/1189 [07:22<1:23:20, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:21,786 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:24,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|██████▉ | 106/1189 [07:27<1:25:32, 4.74s/it] + + 9%|██████▉ | 106/1189 [07:27<1:25:32, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:26,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4365, 'learning_rate': 6.18e-05, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:29,312 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 9%|███████ | 107/1189 [07:32<1:27:13, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:31,878 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:34,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████ | 108/1189 [07:37<1:28:15, 4.90s/it] + + 9%|███████ | 108/1189 [07:37<1:28:15, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:36,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3644, 'learning_rate': 6.299999999999999e-05, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:39,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 9%|███████▏ | 109/1189 [07:42<1:28:46, 4.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:41,901 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:44,330 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▏ | 110/1189 [07:47<1:28:52, 4.94s/it] + + 9%|███████▏ | 110/1189 [07:47<1:28:52, 4.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:46,814 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:49,183 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 9%|███████▎ | 111/1189 [07:52<1:28:18, 4.91s/it] + + 9%|███████▎ | 111/1189 [07:52<1:28:18, 4.91s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:51,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4694, 'learning_rate': 6.479999999999999e-05, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:53,991 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 9%|███████▎ | 112/1189 [07:56<1:27:39, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:52:56,411 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2336, 'learning_rate': 6.539999999999999e-05, 'epoch': 0.09} +[WARNING|modeling_utils.py:388] 2022-03-02 15:52:58,750 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 10%|███████▍ | 113/1189 [08:01<1:26:53, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:01,256 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:03,607 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4048, 'learning_rate': 6.599999999999999e-05, 'epoch': 0.1} + + 10%|███████▍ | 114/1189 [08:06<1:26:53, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:06,121 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3602, 'learning_rate': 6.659999999999999e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:08,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 10%|███████▌ | 115/1189 [08:11<1:26:54, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:10,893 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:13,268 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▌ | 116/1189 [08:16<1:26:28, 4.84s/it] + + 10%|███████▌ | 116/1189 [08:16<1:26:28, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:15,608 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:17,941 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2123, 'learning_rate': 6.78e-05, 'epoch': 0.1} + 10%|███████▋ | 117/1189 [08:20<1:25:31, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:20,331 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:22,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0794, 'learning_rate': 6.84e-05, 'epoch': 0.1} + + 10%|███████▋ | 118/1189 [08:25<1:24:40, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:24,986 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:27,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 119/1189 [08:30<1:24:18, 4.73s/it] + + 10%|███████▊ | 119/1189 [08:30<1:24:18, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:29,632 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:31,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▊ | 120/1189 [08:34<1:23:49, 4.70s/it] + + 10%|███████▊ | 120/1189 [08:34<1:23:49, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:34,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:36,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|███████▉ | 121/1189 [08:39<1:23:30, 4.69s/it] + + 10%|███████▉ | 121/1189 [08:39<1:23:30, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:38,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3001, 'learning_rate': 7.079999999999999e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:41,205 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 10%|████████ | 122/1189 [08:44<1:23:02, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:43,527 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:45,789 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████ | 123/1189 [08:48<1:23:00, 4.67s/it] + + 10%|████████ | 123/1189 [08:48<1:23:00, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:48,196 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:50,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 10%|████████▏ | 124/1189 [08:53<1:21:58, 4.62s/it] + + 10%|████████▏ | 124/1189 [08:53<1:21:58, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:52,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:54,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▏ | 125/1189 [08:57<1:21:00, 4.57s/it] + + 11%|████████▏ | 125/1189 [08:57<1:21:00, 4.57s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:53:57,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:53:59,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 126/1189 [09:02<1:20:15, 4.53s/it] + + 11%|████████▎ | 126/1189 [09:02<1:20:15, 4.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:01,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:03,728 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▎ | 127/1189 [09:06<1:19:49, 4.51s/it] + + 11%|████████▎ | 127/1189 [09:06<1:19:49, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:05,938 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1436, 'learning_rate': 7.439999999999999e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:08,112 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 11%|████████▍ | 128/1189 [09:10<1:19:05, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:10,349 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:12,476 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▍ | 129/1189 [09:15<1:18:26, 4.44s/it] + + 11%|████████▍ | 129/1189 [09:15<1:18:26, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:14,725 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:16,793 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 130/1189 [09:19<1:17:42, 4.40s/it] + + 11%|████████▌ | 130/1189 [09:19<1:17:42, 4.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:18,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:21,051 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▌ | 131/1189 [09:23<1:16:51, 4.36s/it] + + 11%|████████▌ | 131/1189 [09:23<1:16:51, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:23,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:25,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 132/1189 [09:28<1:15:33, 4.29s/it] + + 11%|████████▋ | 132/1189 [09:28<1:15:33, 4.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:27,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:29,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▋ | 133/1189 [09:32<1:14:33, 4.24s/it] + + 11%|████████▋ | 133/1189 [09:32<1:14:33, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:31,327 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:33,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 134/1189 [09:36<1:13:10, 4.16s/it] + + 11%|████████▊ | 134/1189 [09:36<1:13:10, 4.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:35,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:37,212 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▊ | 135/1189 [09:40<1:11:54, 4.09s/it] + + 11%|████████▊ | 135/1189 [09:40<1:11:54, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:39,186 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:41,057 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 11%|████████▉ | 136/1189 [09:43<1:10:32, 4.02s/it] + + 11%|████████▉ | 136/1189 [09:43<1:10:32, 4.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:43,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:44,874 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|████████▉ | 137/1189 [09:47<1:09:24, 3.96s/it] + + 12%|████████▉ | 137/1189 [09:47<1:09:24, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:46,800 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4032, 'learning_rate': 8.04e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:48,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 138/1189 [09:51<1:08:11, 3.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:50,480 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:52,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████ | 139/1189 [09:55<1:06:37, 3.81s/it] + + 12%|█████████ | 139/1189 [09:55<1:06:37, 3.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:54,023 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:55,681 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 140/1189 [09:58<1:04:44, 3.70s/it] + + 12%|█████████▏ | 140/1189 [09:58<1:04:44, 3.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:54:57,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4511, 'learning_rate': 8.22e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 15:54:58,848 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▏ | 141/1189 [10:01<1:01:53, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:00,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:01,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 142/1189 [10:04<59:04, 3.39s/it] + + 12%|█████████▌ | 142/1189 [10:04<59:04, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:03,348 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.1865, 'learning_rate': 8.34e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:04,669 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▌ | 143/1189 [10:07<55:58, 3.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:06,044 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5402, 'learning_rate': 8.4e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:07,267 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▋ | 144/1189 [10:10<52:42, 3.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:08,517 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:09,615 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 145/1189 [10:12<49:06, 2.82s/it] + 12%|█████████▊ | 145/1189 [10:12<49:06, 2.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:10,752 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:11,746 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▊ | 146/1189 [10:14<45:27, 2.62s/it] + 12%|█████████▊ | 146/1189 [10:14<45:27, 2.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:12,817 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:13,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 147/1189 [10:16<41:55, 2.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:14,614 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4617, 'learning_rate': 8.579999999999998e-05, 'epoch': 0.12} +{'loss': 4.2089, 'learning_rate': 8.639999999999999e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:15,381 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 12%|█████████▉ | 148/1189 [10:18<38:07, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:16,185 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4946, 'learning_rate': 8.699999999999999e-05, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:16,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 149/1189 [10:19<34:11, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:17,533 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4766, 'learning_rate': 8.759999999999999e-05, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:18,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 13%|██████████ | 150/1189 [10:21<33:17, 1.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:21,550 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:24,119 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 151/1189 [10:26<51:44, 2.99s/it] + + 13%|██████████▏ | 151/1189 [10:26<51:44, 2.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:26,742 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:29,263 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|█████████▉ | 152/1189 [10:32<1:02:51, 3.64s/it] + + 13%|█████████▉ | 152/1189 [10:32<1:02:51, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:31,933 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:34,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████ | 153/1189 [10:37<1:10:41, 4.09s/it] + + 13%|██████████ | 153/1189 [10:37<1:10:41, 4.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:37,060 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:39,569 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4248, 'learning_rate': 8.999999999999999e-05, 'epoch': 0.13} + + 13%|██████████ | 154/1189 [10:42<1:16:04, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:42,208 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:44,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 155/1189 [10:47<1:19:31, 4.61s/it] + + 13%|██████████▏ | 155/1189 [10:47<1:19:31, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:47,232 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:49,703 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▏ | 156/1189 [10:52<1:21:39, 4.74s/it] + + 13%|██████████▏ | 156/1189 [10:52<1:21:39, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:52,269 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:54,634 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 157/1189 [10:57<1:22:32, 4.80s/it] + + 13%|██████████▎ | 157/1189 [10:57<1:22:32, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:55:57,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:55:59,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▎ | 158/1189 [11:02<1:23:16, 4.85s/it] + + 13%|██████████▎ | 158/1189 [11:02<1:23:16, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:02,073 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:04,486 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 159/1189 [11:07<1:23:27, 4.86s/it] + + 13%|██████████▍ | 159/1189 [11:07<1:23:27, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:07,009 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:09,402 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 13%|██████████▍ | 160/1189 [11:12<1:23:38, 4.88s/it] + + 13%|██████████▍ | 160/1189 [11:12<1:23:38, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:11,886 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:14,287 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▌ | 161/1189 [11:17<1:23:35, 4.88s/it] + + 14%|██████████▌ | 161/1189 [11:17<1:23:35, 4.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:16,743 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:19,089 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|██████████▋ | 162/1189 [11:21<1:23:07, 4.86s/it] + + 14%|██████████▋ | 162/1189 [11:21<1:23:07, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:21,495 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3452, 'learning_rate': 9.539999999999999e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:23,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|██████████▋ | 163/1189 [11:26<1:22:36, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:26,275 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.0952, 'learning_rate': 9.599999999999999e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:28,598 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|██████████▊ | 164/1189 [11:31<1:22:03, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:31,024 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3206, 'learning_rate': 9.659999999999999e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:33,333 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|██████████▊ | 165/1189 [11:36<1:21:37, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:35,735 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 3.9475, 'learning_rate': 9.719999999999999e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:38,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|██████████▉ | 166/1189 [11:40<1:21:14, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:40,454 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3213, 'learning_rate': 9.779999999999999e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:42,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|██████████▉ | 167/1189 [11:45<1:20:58, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:45,181 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2528, 'learning_rate': 9.839999999999999e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:47,474 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|███████████ | 168/1189 [11:50<1:20:34, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:49,830 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3995, 'learning_rate': 9.9e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:52,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|███████████ | 169/1189 [11:54<1:19:57, 4.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:54,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2793, 'learning_rate': 9.96e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:56:56,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|███████████▏ | 170/1189 [11:59<1:19:08, 4.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:56:58,994 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3207, 'learning_rate': 0.0001002, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:01,244 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 14%|███████████▏ | 171/1189 [12:04<1:18:40, 4.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:03,561 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:05,783 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 14%|███████████▎ | 172/1189 [12:08<1:18:04, 4.61s/it] + + 14%|███████████▎ | 172/1189 [12:08<1:18:04, 4.61s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:08,117 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:10,332 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▎ | 173/1189 [12:13<1:17:42, 4.59s/it] + + 15%|███████████▎ | 173/1189 [12:13<1:17:42, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:12,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5079, 'learning_rate': 0.000102, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:14,822 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 15%|███████████▍ | 174/1189 [12:17<1:17:07, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:17,022 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3925, 'learning_rate': 0.0001026, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:19,194 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 15%|███████████▍ | 175/1189 [12:22<1:16:06, 4.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:21,398 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3453, 'learning_rate': 0.00010319999999999999, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:23,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 15%|███████████▌ | 176/1189 [12:26<1:15:03, 4.45s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:25,758 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4135, 'learning_rate': 0.00010379999999999999, 'epoch': 0.15} +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:27,851 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 15%|███████████▌ | 177/1189 [12:30<1:14:28, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:30,031 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:32,138 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 178/1189 [12:35<1:13:44, 4.38s/it] + + 15%|███████████▋ | 178/1189 [12:35<1:13:44, 4.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:34,336 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:36,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▋ | 179/1189 [12:39<1:13:05, 4.34s/it] + + 15%|███████████▋ | 179/1189 [12:39<1:13:05, 4.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:38,605 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:40,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 180/1189 [12:43<1:12:31, 4.31s/it] + + 15%|███████████▊ | 180/1189 [12:43<1:12:31, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:42,836 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:44,898 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▊ | 181/1189 [12:47<1:12:09, 4.30s/it] + + 15%|███████████▊ | 181/1189 [12:47<1:12:09, 4.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:47,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:49,003 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|███████████▉ | 182/1189 [12:51<1:11:07, 4.24s/it] + + 15%|███████████▉ | 182/1189 [12:51<1:11:07, 4.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:51,084 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:53,098 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 183/1189 [12:55<1:10:21, 4.20s/it] + + 15%|████████████ | 183/1189 [12:55<1:10:21, 4.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:55,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:57:57,055 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 15%|████████████ | 184/1189 [12:59<1:09:04, 4.12s/it] + + 15%|████████████ | 184/1189 [12:59<1:09:04, 4.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:57:59,105 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:01,016 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 185/1189 [13:03<1:08:11, 4.07s/it] + + 16%|████████████▏ | 185/1189 [13:03<1:08:11, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:03,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:04,876 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▏ | 186/1189 [13:07<1:07:02, 4.01s/it] + + 16%|████████████▏ | 186/1189 [13:07<1:07:02, 4.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:06,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:08,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 187/1189 [13:11<1:05:53, 3.95s/it] + + 16%|████████████▎ | 187/1189 [13:11<1:05:53, 3.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:10,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:12,343 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▎ | 188/1189 [13:15<1:04:28, 3.86s/it] + 16%|████████████▎ | 188/1189 [13:15<1:04:28, 3.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:14,190 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:15,881 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5038, 'learning_rate': 0.00011099999999999999, 'epoch': 0.16} + 16%|████████████▍ | 189/1189 [13:18<1:02:45, 3.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:17,597 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:19,168 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▍ | 190/1189 [13:22<1:00:19, 3.62s/it] + + 16%|████████████▍ | 190/1189 [13:22<1:00:19, 3.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:20,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4092, 'learning_rate': 0.00011219999999999999, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:22,238 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▊ | 191/1189 [13:25<57:29, 3.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:23,762 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:25,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|████████████▉ | 192/1189 [13:28<54:39, 3.29s/it] + 16%|████████████▉ | 192/1189 [13:28<54:39, 3.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:26,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:27,831 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0273, 'learning_rate': 0.00011339999999999999, 'epoch': 0.16} + 16%|████████████▉ | 193/1189 [13:30<51:38, 3.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:29,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3623, 'learning_rate': 0.00011399999999999999, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:30,308 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████ | 194/1189 [13:33<48:26, 2.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:31,521 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6133, 'learning_rate': 0.0001146, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:32,631 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████ | 195/1189 [13:35<45:24, 2.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:33,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9681, 'learning_rate': 0.0001152, 'epoch': 0.16} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:34,777 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 16%|█████████████▏ | 196/1189 [13:37<42:24, 2.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:35,823 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5346, 'learning_rate': 0.0001158, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:36,697 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 197/1189 [13:39<39:10, 2.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:37,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6223, 'learning_rate': 0.0001164, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:38,438 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▎ | 198/1189 [13:41<36:02, 2.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:39,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:40,038 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6635, 'learning_rate': 0.000117, 'epoch': 0.17} + 17%|█████████████▍ | 199/1189 [13:42<33:06, 2.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:40,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:41,854 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 200/1189 [13:44<32:08, 1.95s/it] + + 17%|█████████████▍ | 200/1189 [13:44<32:08, 1.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:44,667 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:47,305 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 201/1189 [13:50<49:24, 3.00s/it] + + 17%|█████████████▌ | 201/1189 [13:50<49:24, 3.00s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:49,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:52,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3973, 'learning_rate': 0.0001188, 'epoch': 0.17} + 17%|█████████████▎ | 202/1189 [13:55<1:00:17, 3.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:58:55,079 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5411, 'learning_rate': 0.0001194, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 15:58:57,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 17%|█████████████▎ | 203/1189 [14:00<1:06:54, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:00,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:02,529 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 204/1189 [14:05<1:11:21, 4.35s/it] + + 17%|█████████████▍ | 204/1189 [14:05<1:11:21, 4.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:05,136 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:07,543 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▍ | 205/1189 [14:10<1:14:33, 4.55s/it] + + 17%|█████████████▍ | 205/1189 [14:10<1:14:33, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:10,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:12,556 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 17%|█████████████▌ | 206/1189 [14:15<1:16:47, 4.69s/it] + + 17%|█████████████▌ | 206/1189 [14:15<1:16:47, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:15,127 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:17,591 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4064, 'learning_rate': 0.00012179999999999999, 'epoch': 0.17} + + 17%|█████████████▌ | 207/1189 [14:20<1:18:24, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:20,100 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0471, 'learning_rate': 0.0001224, 'epoch': 0.17} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:22,508 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 17%|█████████████▋ | 208/1189 [14:25<1:18:56, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:25,020 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4697, 'learning_rate': 0.00012299999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:27,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|█████████████▋ | 209/1189 [14:30<1:19:50, 4.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:30,041 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7667, 'learning_rate': 0.0001236, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:32,457 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|█████████████▊ | 210/1189 [14:35<1:19:55, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:34,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.41, 'learning_rate': 0.00012419999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:37,360 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|█████████████▊ | 211/1189 [14:40<1:19:51, 4.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:39,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4879, 'learning_rate': 0.00012479999999999997, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:42,131 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|█████████████▉ | 212/1189 [14:45<1:19:09, 4.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:44,609 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2788, 'learning_rate': 0.00012539999999999999, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:46,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|█████████████▉ | 213/1189 [14:49<1:18:50, 4.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:49,356 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.543, 'learning_rate': 0.00012599999999999997, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:51,709 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|██████████████ | 214/1189 [14:54<1:18:21, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:54,176 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 15:59:56,511 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████ | 215/1189 [14:59<1:18:11, 4.82s/it] + + 18%|██████████████ | 215/1189 [14:59<1:18:11, 4.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 15:59:58,894 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5093, 'learning_rate': 0.00012719999999999997, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:01,214 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|██████████████▏ | 216/1189 [15:04<1:17:33, 4.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:03,548 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.577, 'learning_rate': 0.0001278, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:05,864 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|██████████████▏ | 217/1189 [15:08<1:16:49, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:08,264 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:10,583 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 18%|██████████████▎ | 218/1189 [15:13<1:16:38, 4.74s/it] + + 18%|██████████████▎ | 218/1189 [15:13<1:16:38, 4.74s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:13,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.123, 'learning_rate': 0.000129, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:15,274 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 18%|██████████████▎ | 219/1189 [15:18<1:16:20, 4.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:17,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8962, 'learning_rate': 0.00012959999999999998, 'epoch': 0.18} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:19,829 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|██████████████▍ | 220/1189 [15:22<1:15:27, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:22,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4084, 'learning_rate': 0.0001302, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:24,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|██████████████▍ | 221/1189 [15:27<1:14:31, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:26,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:28,846 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▌ | 222/1189 [15:31<1:13:58, 4.59s/it] + + 19%|██████████████▌ | 222/1189 [15:31<1:13:58, 4.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:31,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:33,287 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▋ | 223/1189 [15:36<1:13:10, 4.55s/it] + + 19%|██████████████▋ | 223/1189 [15:36<1:13:10, 4.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:35,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:37,760 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5116, 'learning_rate': 0.00013199999999999998, 'epoch': 0.19} + + 19%|██████████████▋ | 224/1189 [15:40<1:12:45, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:39,980 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4669, 'learning_rate': 0.0001326, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:42,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|██████████████▊ | 225/1189 [15:45<1:12:05, 4.49s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:44,413 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.296, 'learning_rate': 0.00013319999999999999, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:46,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|██████████████▊ | 226/1189 [15:49<1:11:42, 4.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:48,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:50,947 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|██████████████▉ | 227/1189 [15:53<1:11:07, 4.44s/it] + + 19%|██████████████▉ | 227/1189 [15:53<1:11:07, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:53,249 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6158, 'learning_rate': 0.0001344, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:55,339 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|██████████████▉ | 228/1189 [15:58<1:10:49, 4.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:00:57,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5602, 'learning_rate': 0.000135, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 16:00:59,547 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|███████████████ | 229/1189 [16:02<1:09:44, 4.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:01,699 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:03,800 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 19%|███████████████ | 230/1189 [16:06<1:09:10, 4.33s/it] + + 19%|███████████████ | 230/1189 [16:06<1:09:10, 4.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:05,968 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5002, 'learning_rate': 0.0001362, 'epoch': 0.19} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:07,977 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 19%|███████████████▏ | 231/1189 [16:10<1:08:21, 4.28s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:10,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:12,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▏ | 232/1189 [16:14<1:07:27, 4.23s/it] + + 20%|███████████████▏ | 232/1189 [16:14<1:07:27, 4.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:14,229 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7365, 'learning_rate': 0.0001374, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:16,251 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|███████████████▎ | 233/1189 [16:19<1:07:05, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:18,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3405, 'learning_rate': 0.000138, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:20,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|███████████████▎ | 234/1189 [16:23<1:05:52, 4.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:22,266 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9632, 'learning_rate': 0.0001386, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:24,116 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|███████████████▍ | 235/1189 [16:26<1:04:38, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:26,108 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6011, 'learning_rate': 0.0001392, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:27,935 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|███████████████▍ | 236/1189 [16:30<1:03:23, 3.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:29,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.347, 'learning_rate': 0.00013979999999999998, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:31,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|███████████████▌ | 237/1189 [16:34<1:02:33, 3.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:33,688 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.531, 'learning_rate': 0.0001404, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:35,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|███████████████▌ | 238/1189 [16:38<1:01:18, 3.87s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:37,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4014, 'learning_rate': 0.00014099999999999998, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:39,097 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|███████████████▋ | 239/1189 [16:41<1:00:09, 3.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:40,915 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5991, 'learning_rate': 0.00014159999999999997, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:42,624 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|████████████████▏ | 240/1189 [16:45<58:48, 3.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:44,436 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9526, 'learning_rate': 0.0001422, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:46,107 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|████████████████▏ | 241/1189 [16:48<57:37, 3.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:47,820 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2825, 'learning_rate': 0.00014279999999999997, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:49,322 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 20%|████████████████▎ | 242/1189 [16:52<55:31, 3.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:50,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5183, 'learning_rate': 0.0001434, 'epoch': 0.2} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:52,429 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 20%|████████████████▎ | 243/1189 [16:55<53:31, 3.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:53,954 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4014, 'learning_rate': 0.00014399999999999998, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:55,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 244/1189 [16:58<51:09, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:56,724 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0268, 'learning_rate': 0.0001446, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:01:57,900 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▍ | 245/1189 [17:00<47:52, 3.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:01:59,173 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2919, 'learning_rate': 0.00014519999999999998, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:00,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 246/1189 [17:03<44:33, 2.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:01,364 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4588, 'learning_rate': 0.0001458, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:02,317 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 247/1189 [17:05<40:53, 2.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:03,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6392, 'learning_rate': 0.00014639999999999998, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:04,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 248/1189 [17:07<37:08, 2.37s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:04,976 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7751, 'learning_rate': 0.000147, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:05,653 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▊ | 249/1189 [17:08<33:06, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:06,353 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:07,451 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6246, 'learning_rate': 0.00014759999999999998, 'epoch': 0.21} + + 21%|████████████████▊ | 250/1189 [17:10<31:35, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:10,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1391, 'learning_rate': 0.0001482, 'epoch': 0.21} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:12,774 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 21%|████████████████▉ | 251/1189 [17:15<47:04, 3.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:15,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:17,981 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▉ | 252/1189 [17:20<57:18, 3.67s/it] + + 21%|████████████████▉ | 252/1189 [17:20<57:18, 3.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:20,584 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:22,993 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▌ | 253/1189 [17:25<1:03:31, 4.07s/it] + + 21%|████████████████▌ | 253/1189 [17:25<1:03:31, 4.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:25,564 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:27,987 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 254/1189 [17:30<1:07:46, 4.35s/it] + + 21%|████████████████▋ | 254/1189 [17:30<1:07:46, 4.35s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:30,535 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:32,908 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 21%|████████████████▋ | 255/1189 [17:35<1:10:21, 4.52s/it] + + 21%|████████████████▋ | 255/1189 [17:35<1:10:21, 4.52s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:35,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:37,937 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|████████████████▊ | 256/1189 [17:40<1:12:39, 4.67s/it] + + 22%|████████████████▊ | 256/1189 [17:40<1:12:39, 4.67s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:40,426 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2905, 'learning_rate': 0.00015179999999999998, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:42,792 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|████████████████▊ | 257/1189 [17:45<1:13:26, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:45,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8819, 'learning_rate': 0.0001524, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:47,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|████████████████▉ | 258/1189 [17:50<1:13:54, 4.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:50,157 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5516, 'learning_rate': 0.00015299999999999998, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:52,509 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|████████████████▉ | 259/1189 [17:55<1:14:19, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:55,052 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3883, 'learning_rate': 0.0001536, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:02:57,459 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|█████████████████ | 260/1189 [18:00<1:14:57, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:02:59,900 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5, 'learning_rate': 0.00015419999999999998, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:02,272 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|█████████████████ | 261/1189 [18:05<1:14:45, 4.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:04,694 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4915, 'learning_rate': 0.0001548, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:07,021 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|█████████████████▏ | 262/1189 [18:09<1:14:16, 4.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:09,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5544, 'learning_rate': 0.00015539999999999998, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:11,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|█████████████████▎ | 263/1189 [18:14<1:14:04, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:14,247 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8566, 'learning_rate': 0.000156, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:16,582 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|█████████████████▎ | 264/1189 [18:19<1:13:54, 4.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:18,939 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:21,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 22%|█████████████████▍ | 265/1189 [18:24<1:13:13, 4.75s/it] + + 22%|█████████████████▍ | 265/1189 [18:24<1:13:13, 4.75s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:23,619 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:25,904 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8957, 'learning_rate': 0.0001572, 'epoch': 0.22} + + 22%|█████████████████▍ | 266/1189 [18:28<1:12:42, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:28,279 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4304, 'learning_rate': 0.0001578, 'epoch': 0.22} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:30,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 22%|█████████████████▌ | 267/1189 [18:33<1:12:07, 4.69s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:32,969 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4978, 'learning_rate': 0.0001584, 'epoch': 0.23} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:35,253 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 23%|█████████████████▌ | 268/1189 [18:38<1:12:13, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:37,633 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:39,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5355, 'learning_rate': 0.000159, 'epoch': 0.23} + 23%|█████████████████▋ | 269/1189 [18:42<1:11:48, 4.68s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:42,223 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:44,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▋ | 270/1189 [18:47<1:11:16, 4.65s/it] + + 23%|█████████████████▋ | 270/1189 [18:47<1:11:16, 4.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:46,825 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:49,006 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6459, 'learning_rate': 0.0001602, 'epoch': 0.23} + + 23%|█████████████████▊ | 271/1189 [18:51<1:10:39, 4.62s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:51,292 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3063, 'learning_rate': 0.0001608, 'epoch': 0.23} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:53,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 23%|█████████████████▊ | 272/1189 [18:56<1:09:40, 4.56s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:55,679 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4559, 'learning_rate': 0.0001614, 'epoch': 0.23} +[WARNING|modeling_utils.py:388] 2022-03-02 16:03:57,782 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 23%|█████████████████▉ | 273/1189 [19:00<1:08:40, 4.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:03:59,988 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:02,155 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|█████████████████▉ | 274/1189 [19:05<1:08:01, 4.46s/it] + + 23%|█████████████████▉ | 274/1189 [19:05<1:08:01, 4.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:04,375 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:06,554 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 275/1189 [19:09<1:07:39, 4.44s/it] + + 23%|██████████████████ | 275/1189 [19:09<1:07:39, 4.44s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:08,766 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:10,887 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████ | 276/1189 [19:13<1:07:05, 4.41s/it] + + 23%|██████████████████ | 276/1189 [19:13<1:07:05, 4.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:13,128 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:15,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 277/1189 [19:18<1:06:40, 4.39s/it] + + 23%|██████████████████▏ | 277/1189 [19:18<1:06:40, 4.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:17,324 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:19,359 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▏ | 278/1189 [19:22<1:05:28, 4.31s/it] + + 23%|██████████████████▏ | 278/1189 [19:22<1:05:28, 4.31s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:21,477 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:23,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 23%|██████████████████▎ | 279/1189 [19:26<1:04:39, 4.26s/it] + + 23%|██████████████████▎ | 279/1189 [19:26<1:04:39, 4.26s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:25,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:27,671 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|██████████████████▎ | 280/1189 [19:30<1:04:08, 4.23s/it] + + 24%|██████████████████▎ | 280/1189 [19:30<1:04:08, 4.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:29,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4222, 'learning_rate': 0.0001662, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:31,833 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|██████████████████▍ | 281/1189 [19:34<1:03:44, 4.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:33,873 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.501, 'learning_rate': 0.0001668, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:35,781 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|██████████████████▍ | 282/1189 [19:38<1:02:28, 4.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:37,866 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9132, 'learning_rate': 0.0001674, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:39,815 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|██████████████████▌ | 283/1189 [19:42<1:01:57, 4.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:41,787 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4118, 'learning_rate': 0.000168, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:43,695 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|██████████████████▋ | 284/1189 [19:46<1:00:51, 4.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:45,646 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6771, 'learning_rate': 0.0001686, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:47,442 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|███████████████████▏ | 285/1189 [19:50<59:30, 3.95s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:49,365 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3535, 'learning_rate': 0.00016919999999999997, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:51,170 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|███████████████████▏ | 286/1189 [19:54<58:26, 3.88s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:53,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5797, 'learning_rate': 0.00016979999999999998, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:54,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▎ | 287/1189 [19:57<57:27, 3.82s/it] + 24%|███████████████████▎ | 287/1189 [19:57<57:27, 3.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:04:56,702 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:04:58,366 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▍ | 288/1189 [20:01<56:00, 3.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:00,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6374, 'learning_rate': 0.00017099999999999998, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:01,803 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|███████████████████▍ | 289/1189 [20:04<54:38, 3.64s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:03,506 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5353, 'learning_rate': 0.00017159999999999997, 'epoch': 0.24} +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:05,113 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 24%|███████████████████▌ | 290/1189 [20:07<53:05, 3.54s/it] + 24%|███████████████████▌ | 290/1189 [20:07<53:05, 3.54s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:06,737 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:08,184 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 24%|███████████████████▌ | 291/1189 [20:11<50:54, 3.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:09,665 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0447, 'learning_rate': 0.00017279999999999997, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:11,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 292/1189 [20:13<48:37, 3.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:12,458 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2653, 'learning_rate': 0.00017339999999999996, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:13,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▋ | 293/1189 [20:16<45:32, 3.05s/it] + 25%|███████████████████▋ | 293/1189 [20:16<45:32, 3.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:14,943 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:16,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 294/1189 [20:18<42:31, 2.85s/it] + 25%|███████████████████▊ | 294/1189 [20:18<42:31, 2.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:17,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:18,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▊ | 295/1189 [20:21<39:33, 2.66s/it] + 25%|███████████████████▊ | 295/1189 [20:21<39:33, 2.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:19,294 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:20,171 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 296/1189 [20:23<36:13, 2.43s/it] + 25%|███████████████████▉ | 296/1189 [20:23<36:13, 2.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:21,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:21,922 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|███████████████████▉ | 297/1189 [20:24<33:09, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:22,747 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.943, 'learning_rate': 0.00017639999999999998, 'epoch': 0.25} +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:23,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 298/1189 [20:26<29:57, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:24,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:24,843 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 299/1189 [20:27<27:10, 1.83s/it] +{'loss': 5.6903, 'learning_rate': 0.00017699999999999997, 'epoch': 0.25} + 25%|████████████████████ | 299/1189 [20:27<27:10, 1.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:25,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 16:05:26,581 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▏ | 300/1189 [20:29<26:43, 1.80s/it] + 25%|████████████████████ | 299/1189 [20:27<27:10, 1.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:25,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████ | 299/1189 [20:27<27:10, 1.83s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:25,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 301/1189 [20:34<42:15, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:25,522 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 301/1189 [20:34<42:15, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:34,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 301/1189 [20:34<42:15, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:34,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8048, 'learning_rate': 0.00017879999999999998, 'epoch': 0.25} + 25%|████████████████████▎ | 301/1189 [20:34<42:15, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:34,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▎ | 301/1189 [20:34<42:15, 2.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:34,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:44<58:29, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:34,462 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:44<58:29, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:44,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:44<58:29, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:44,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.2932, 'learning_rate': 0.00017999999999999998, 'epoch': 0.26} + 25%|████████████████████▍ | 303/1189 [20:44<58:29, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:44,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 25%|████████████████████▍ | 303/1189 [20:44<58:29, 3.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:44,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:54<1:06:30, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:44,528 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:54<1:06:30, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:54,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:54<1:06:30, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:54,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5021, 'learning_rate': 0.00018119999999999999, 'epoch': 0.26} + 26%|████████████████████ | 305/1189 [20:54<1:06:30, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:54,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████ | 305/1189 [20:54<1:06:30, 4.51s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:54,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:04<1:09:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:05:54,599 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:04<1:09:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:04,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:04<1:09:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:04,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3651, 'learning_rate': 0.0001824, 'epoch': 0.26} + 26%|████████████████████▏ | 307/1189 [21:04<1:09:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:04,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▏ | 307/1189 [21:04<1:09:15, 4.71s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:04,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:14<1:09:56, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:04,350 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:14<1:09:56, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:14,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:14<1:09:56, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:14,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▎ | 309/1189 [21:14<1:09:56, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:14,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0125, 'learning_rate': 0.0001836, 'epoch': 0.26} + 26%|████████████████████▎ | 309/1189 [21:14<1:09:56, 4.77s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:14,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 311/1189 [21:24<1:10:53, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:14,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 311/1189 [21:24<1:10:53, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:23,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 311/1189 [21:24<1:10:53, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:23,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▍ | 311/1189 [21:24<1:10:53, 4.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:23,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.349, 'learning_rate': 0.0001848, 'epoch': 0.26} + [WARNING|modeling_utils.py:388] 2022-03-02 16:06:23,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 16:06:23,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 313/1189 [21:33<1:10:08, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:23,806 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 313/1189 [21:33<1:10:08, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:33,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 313/1189 [21:33<1:10:08, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:33,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9448, 'learning_rate': 0.000186, 'epoch': 0.26} + 26%|████████████████████▌ | 313/1189 [21:33<1:10:08, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:33,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▌ | 313/1189 [21:33<1:10:08, 4.80s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:33,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:33,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6354, 'learning_rate': 0.0001872, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8862, 'learning_rate': 0.00018779999999999998, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6156, 'learning_rate': 0.00018839999999999997, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5698, 'learning_rate': 0.00018899999999999999, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.705, 'learning_rate': 0.00018959999999999997, 'epoch': 0.27} + 26%|███████████████████��▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5264, 'learning_rate': 0.0001902, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4682, 'learning_rate': 0.00019079999999999998, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3161, 'learning_rate': 0.0001914, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4899, 'learning_rate': 0.00019199999999999998, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4182, 'learning_rate': 0.0001926, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.218, 'learning_rate': 0.00019319999999999998, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8207, 'learning_rate': 0.0001938, 'epoch': 0.27} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8061, 'learning_rate': 0.00019439999999999998, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4415, 'learning_rate': 0.000195, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8382, 'learning_rate': 0.00019559999999999998, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.598, 'learning_rate': 0.0001962, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.849, 'learning_rate': 0.00019679999999999999, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5554, 'learning_rate': 0.0001974, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5958, 'learning_rate': 0.000198, 'epoch': 0.28} + 26%|█��██████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.911, 'learning_rate': 0.0001986, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8719, 'learning_rate': 0.0001992, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4203, 'learning_rate': 0.0001998, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7382, 'learning_rate': 0.0002004, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7495, 'learning_rate': 0.000201, 'epoch': 0.28} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9017, 'learning_rate': 0.0002016, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8554, 'learning_rate': 0.0002022, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0008, 'learning_rate': 0.0002028, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5591, 'learning_rate': 0.00020339999999999998, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0833, 'learning_rate': 0.000204, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9958, 'learning_rate': 0.00020459999999999999, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.573, 'learning_rate': 0.0002052, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7894, 'learning_rate': 0.0002058, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.119, 'learning_rate': 0.00020639999999999998, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0909, 'learning_rate': 0.00020699999999999996, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8193, 'learning_rate': 0.00020759999999999998, 'epoch': 0.29} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7541, 'learning_rate': 0.00020819999999999996, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8195, 'learning_rate': 0.00020879999999999998, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.393, 'learning_rate': 0.00020939999999999997, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7012, 'learning_rate': 0.00020999999999999998, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5452, 'learning_rate': 0.00021059999999999997, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9311, 'learning_rate': 0.00021119999999999996, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8398, 'learning_rate': 0.00021179999999999997, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4316, 'learning_rate': 0.00021239999999999996, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6476, 'learning_rate': 0.00021299999999999997, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.811, 'learning_rate': 0.00021359999999999996, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5928, 'learning_rate': 0.00021419999999999998, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8394, 'learning_rate': 0.00021479999999999996, 'epoch': 0.3} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5888, 'learning_rate': 0.00021539999999999998, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5328, 'learning_rate': 0.00021599999999999996, 'epoch': 0.31} + 26%|█████���██████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7895, 'learning_rate': 0.00021659999999999998, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.204, 'learning_rate': 0.00021719999999999997, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6412, 'learning_rate': 0.00021779999999999998, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.077, 'learning_rate': 0.00021839999999999997, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0518, 'learning_rate': 0.00021899999999999998, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9851, 'learning_rate': 0.00021959999999999997, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8694, 'learning_rate': 0.00022019999999999999, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9845, 'learning_rate': 0.00022079999999999997, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8001, 'learning_rate': 0.0002214, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4582, 'learning_rate': 0.00022199999999999998, 'epoch': 0.31} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6111, 'learning_rate': 0.0002226, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7382, 'learning_rate': 0.00022319999999999998, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5815, 'learning_rate': 0.0002238, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.548, 'learning_rate': 0.00022439999999999998, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7013, 'learning_rate': 0.000225, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8199, 'learning_rate': 0.00022559999999999998, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5677, 'learning_rate': 0.00022619999999999997, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6306, 'learning_rate': 0.00022679999999999998, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4638, 'learning_rate': 0.00022739999999999997, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7158, 'learning_rate': 0.00022799999999999999, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9784, 'learning_rate': 0.00022859999999999997, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6348, 'learning_rate': 0.0002292, 'epoch': 0.32} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8384, 'learning_rate': 0.00022979999999999997, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.793, 'learning_rate': 0.0002304, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9213, 'learning_rate': 0.00023099999999999998, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6966, 'learning_rate': 0.0002316, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7374, 'learning_rate': 0.00023219999999999998, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1651, 'learning_rate': 0.0002328, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6255, 'learning_rate': 0.00023339999999999998, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8594, 'learning_rate': 0.000234, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5503, 'learning_rate': 0.00023459999999999998, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6112, 'learning_rate': 0.0002352, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6174, 'learning_rate': 0.00023579999999999999, 'epoch': 0.33} +{'loss': 5.3497, 'learning_rate': 0.0002364, 'epoch': 0.33} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2984, 'learning_rate': 0.000237, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9087, 'learning_rate': 0.0002376, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9715, 'learning_rate': 0.0002382, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7359, 'learning_rate': 0.0002388, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.659, 'learning_rate': 0.0002394, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7424, 'learning_rate': 0.00023999999999999998, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9856, 'learning_rate': 0.0002406, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9682, 'learning_rate': 0.00024119999999999998, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9717, 'learning_rate': 0.0002418, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3111, 'learning_rate': 0.00024239999999999998, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4094, 'learning_rate': 0.000243, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7539, 'learning_rate': 0.00024359999999999999, 'epoch': 0.34} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4262, 'learning_rate': 0.00024419999999999997, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|███████████████████��▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.643, 'learning_rate': 0.0002448, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1179, 'learning_rate': 0.00024539999999999995, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4305, 'learning_rate': 0.00024599999999999996, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0687, 'learning_rate': 0.0002466, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0495, 'learning_rate': 0.0002472, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4025, 'learning_rate': 0.00024779999999999995, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7475, 'learning_rate': 0.00024839999999999997, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9321, 'learning_rate': 0.000249, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8359, 'learning_rate': 0.00024959999999999994, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2366, 'learning_rate': 0.00025019999999999996, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6573, 'learning_rate': 0.00025079999999999997, 'epoch': 0.35} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9931, 'learning_rate': 0.0002514, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5645, 'learning_rate': 0.00025199999999999995, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6328, 'learning_rate': 0.00025259999999999996, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5977, 'learning_rate': 0.0002532, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4475, 'learning_rate': 0.0002538, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5936, 'learning_rate': 0.00025439999999999995, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7865, 'learning_rate': 0.00025499999999999996, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2149, 'learning_rate': 0.0002556, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8945, 'learning_rate': 0.0002562, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.814, 'learning_rate': 0.00025679999999999995, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7622, 'learning_rate': 0.00025739999999999997, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6538, 'learning_rate': 0.000258, 'epoch': 0.36} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7093, 'learning_rate': 0.0002586, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4731, 'learning_rate': 0.00025919999999999996, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4726, 'learning_rate': 0.00025979999999999997, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4415, 'learning_rate': 0.0002604, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8381, 'learning_rate': 0.000261, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6304, 'learning_rate': 0.00026159999999999996, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9751, 'learning_rate': 0.0002622, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.934, 'learning_rate': 0.0002628, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6406, 'learning_rate': 0.00026339999999999995, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6753, 'learning_rate': 0.00026399999999999997, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4777, 'learning_rate': 0.0002646, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2458, 'learning_rate': 0.0002652, 'epoch': 0.37} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2422, 'learning_rate': 0.00026579999999999996, 'epoch': 0.38} + 26%|████████████████████▋ | 315/1189 [21:43<1:08:51, 4.73s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8309, 'learning_rate': 0.00026639999999999997, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6301, 'learning_rate': 0.0002676, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5924, 'learning_rate': 0.00026819999999999996, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4359, 'learning_rate': 0.0002688, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6921, 'learning_rate': 0.0002694, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9924, 'learning_rate': 0.00027, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9157, 'learning_rate': 0.00027059999999999996, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7934, 'learning_rate': 0.0002712, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9495, 'learning_rate': 0.0002718, 'epoch': 0.38} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7351, 'learning_rate': 0.0002724, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8521, 'learning_rate': 0.00027299999999999997, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8954, 'learning_rate': 0.0002736, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1091, 'learning_rate': 0.0002742, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6737, 'learning_rate': 0.0002748, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6774, 'learning_rate': 0.00027539999999999997, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1294, 'learning_rate': 0.000276, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9277, 'learning_rate': 0.0002766, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7534, 'learning_rate': 0.0002772, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7519, 'learning_rate': 0.0002778, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6761, 'learning_rate': 0.0002784, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6486, 'learning_rate': 0.000279, 'epoch': 0.39} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.946, 'learning_rate': 0.00027959999999999997, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5311, 'learning_rate': 0.0002802, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8559, 'learning_rate': 0.0002808, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6196, 'learning_rate': 0.00028139999999999996, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7566, 'learning_rate': 0.00028199999999999997, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8195, 'learning_rate': 0.0002826, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0072, 'learning_rate': 0.00028319999999999994, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3395, 'learning_rate': 0.00028379999999999996, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8089, 'learning_rate': 0.0002844, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0194, 'learning_rate': 0.000285, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1517, 'learning_rate': 0.00028559999999999995, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4459, 'learning_rate': 0.00028619999999999996, 'epoch': 0.4} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7189, 'learning_rate': 0.0002868, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2612, 'learning_rate': 0.00028739999999999994, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9748, 'learning_rate': 0.00028799999999999995, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.1632, 'learning_rate': 0.00028859999999999997, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7419, 'learning_rate': 0.0002892, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.4433, 'learning_rate': 0.00028979999999999994, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9207, 'learning_rate': 0.00029039999999999996, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3683, 'learning_rate': 0.00029099999999999997, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.5459, 'learning_rate': 0.0002916, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3484, 'learning_rate': 0.00029219999999999995, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3329, 'learning_rate': 0.00029279999999999996, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4753, 'learning_rate': 0.0002934, 'epoch': 0.41} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0896, 'learning_rate': 0.000294, 'epoch': 0.42} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4255, 'learning_rate': 0.00029459999999999995, 'epoch': 0.42} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2874, 'learning_rate': 0.00029519999999999997, 'epoch': 0.42} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6805, 'learning_rate': 0.0002958, 'epoch': 0.42} +{'loss': 4.6878, 'learning_rate': 0.0002964, 'epoch': 0.42} + 38%|██████████████████████████████▎ | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.853, 'learning_rate': 0.00029759999999999997, 'epoch': 0.42} +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/02/2022 16:32:15 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow +{'eval_loss': 5.1412506103515625, 'eval_wer': 1.9614042046806823, 'eval_runtime': 781.4958, 'eval_samples_per_second': 3.381, 'eval_steps_per_second': 0.283, 'epoch': 0.42} +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 16:19:14,449 >> Num examples = 2642 | 450/1189 [30:53<23:47, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 16:06:42,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/02/2022 16:34:03 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20220302_154455-17zs7rwf/run-17zs7rwf.wandb']. This may take a bit of time if the files are large.