diff --git "a/wandb/run-20220302_094439-2kys49al/files/output.log" "b/wandb/run-20220302_094439-2kys49al/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20220302_094439-2kys49al/files/output.log" @@ -0,0 +1,1295 @@ + + 0%| | 0/10701 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.6092, 'learning_rate': 6.000000000000001e-08, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:43,577 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/10701 [00:02<8:17:45, 2.79s/it] + 0%| | 1/10701 [00:02<8:17:45, 2.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:44,936 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:46,010 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 2/10701 [00:05<7:26:39, 2.50s/it] + 0%| | 2/10701 [00:05<7:26:39, 2.50s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:47,245 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:48,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 3/10701 [00:07<7:10:19, 2.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:49,490 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:50,553 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 4/10701 [00:09<6:57:56, 2.34s/it] + 0%| | 4/10701 [00:09<6:57:56, 2.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:51,753 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:52,834 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 5/10701 [00:11<6:53:45, 2.32s/it] + 0%| | 5/10701 [00:11<6:53:45, 2.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:54,015 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:55,082 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.5912, 'learning_rate': 3.6e-07, 'epoch': 0.0} + 0%| | 6/10701 [00:14<6:49:17, 2.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:56,239 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.5078, 'learning_rate': 4.2e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:57,310 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 7/10701 [00:16<6:45:16, 2.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:44:58,455 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.5746, 'learning_rate': 4.800000000000001e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 09:44:59,496 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 8/10701 [00:18<6:40:17, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:00,666 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:01,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 9/10701 [00:20<6:38:39, 2.24s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:02,857 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.4953, 'learning_rate': 5.4e-07, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:03,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 10/10701 [00:23<6:36:34, 2.23s/it] + 0%| | 10/10701 [00:23<6:36:34, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:05,661 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:06,680 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.4469, 'learning_rate': 6.599999999999999e-07, 'epoch': 0.0} +{'loss': 10.4409, 'learning_rate': 7.2e-07, 'epoch': 0.0} + 0%| | 11/10701 [00:25<7:05:59, 2.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:07,838 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:08,889 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 12/10701 [00:27<6:56:09, 2.34s/it] + 0%| | 12/10701 [00:27<6:56:09, 2.34s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:10,046 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:11,064 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 13/10701 [00:30<6:47:31, 2.29s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:12,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:13,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 14/10701 [00:32<6:40:48, 2.25s/it] + 0%| | 14/10701 [00:32<6:40:48, 2.25s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:14,395 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:15,422 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.2975, 'learning_rate': 9e-07, 'epoch': 0.0} + 0%| | 15/10701 [00:34<6:37:34, 2.23s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:16,513 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:17,524 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 16/10701 [00:36<6:30:33, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:18,640 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.3363, 'learning_rate': 9.600000000000001e-07, 'epoch': 0.0} +{'loss': 10.2833, 'learning_rate': 1.0200000000000002e-06, 'epoch': 0.0} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:19,647 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 17/10701 [00:38<6:26:50, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:20,796 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:21,804 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 18/10701 [00:40<6:25:56, 2.17s/it] + 0%|▏ | 18/10701 [00:40<6:25:56, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:22,934 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:23,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 19/10701 [00:43<6:24:12, 2.16s/it] + 0%|▏ | 19/10701 [00:43<6:24:12, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:25,071 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:26,054 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.1454, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.01} + 0%|▏ | 20/10701 [00:45<6:21:56, 2.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:27,164 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:28,142 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.0815, 'learning_rate': 1.26e-06, 'epoch': 0.01} + 0%|▏ | 21/10701 [00:47<6:18:42, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:29,221 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.1507, 'learning_rate': 1.3199999999999999e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:30,197 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 22/10701 [00:49<6:14:40, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:31,288 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:32,261 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + + 0%|▏ | 23/10701 [00:51<6:12:34, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:33,334 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.0909, 'learning_rate': 1.38e-06, 'epoch': 0.01} +{'loss': 10.0759, 'learning_rate': 1.44e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:34,307 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 24/10701 [00:53<6:09:53, 2.08s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:35,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.0475, 'learning_rate': 1.5e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:36,344 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 25/10701 [00:55<6:07:43, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:37,424 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:38,391 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 26/10701 [00:57<6:06:37, 2.06s/it] + + 0%|▏ | 26/10701 [00:57<6:06:37, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:39,468 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.0029, 'learning_rate': 1.62e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:40,431 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 27/10701 [00:59<6:05:33, 2.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:41,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:42,441 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 28/10701 [01:01<6:02:58, 2.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:43,482 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 10.0417, 'learning_rate': 1.68e-06, 'epoch': 0.01} +{'loss': 9.8293, 'learning_rate': 1.74e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:44,410 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 29/10701 [01:03<5:59:15, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:45,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:46,388 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.9804, 'learning_rate': 1.8e-06, 'epoch': 0.01} + 0%|▏ | 30/10701 [01:05<5:56:52, 2.01s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:47,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.8897, 'learning_rate': 1.86e-06, 'epoch': 0.01} +{'loss': 9.7528, 'learning_rate': 1.9200000000000003e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:48,421 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 31/10701 [01:07<5:58:21, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:49,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:50,450 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▏ | 32/10701 [01:09<5:59:04, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:51,492 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:52,380 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.9566, 'learning_rate': 1.98e-06, 'epoch': 0.01} + 0%|▏ | 33/10701 [01:11<5:54:15, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:53,382 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:54,265 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.7642, 'learning_rate': 2.0400000000000004e-06, 'epoch': 0.01} +{'loss': 9.8169, 'learning_rate': 2.1000000000000002e-06, 'epoch': 0.01} + 0%|▏ | 34/10701 [01:13<5:48:37, 1.96s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:55,260 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:56,132 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 35/10701 [01:15<5:43:23, 1.93s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:57,086 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:57,945 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 36/10701 [01:17<5:37:06, 1.90s/it] +{'loss': 9.678, 'learning_rate': 2.16e-06, 'epoch': 0.01} + 0%|▎ | 36/10701 [01:17<5:37:06, 1.90s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:45:58,896 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:45:59,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 37/10701 [01:18<5:31:26, 1.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:00,643 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.6785, 'learning_rate': 2.28e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:01,437 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 38/10701 [01:20<5:22:42, 1.82s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:02,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.8736, 'learning_rate': 2.34e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:03,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 39/10701 [01:22<5:16:57, 1.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:04,059 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.6238, 'learning_rate': 2.4000000000000003e-06, 'epoch': 0.01} +{'loss': 9.6723, 'learning_rate': 2.46e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:04,849 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 40/10701 [01:23<5:12:29, 1.76s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:05,715 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:06,484 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 41/10701 [01:25<5:06:03, 1.72s/it] + 0%|▎ | 41/10701 [01:25<5:06:03, 1.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:07,377 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:08,126 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 42/10701 [01:27<5:01:34, 1.70s/it] + 0%|▎ | 42/10701 [01:27<5:01:34, 1.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:08,970 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:09,691 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 43/10701 [01:28<4:54:37, 1.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:10,494 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.5222, 'learning_rate': 2.6399999999999997e-06, 'epoch': 0.01} +{'loss': 9.7702, 'learning_rate': 2.7e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:11,151 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 44/10701 [01:30<4:43:49, 1.60s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:11,877 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:12,512 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 45/10701 [01:31<4:31:22, 1.53s/it] + 0%|▎ | 45/10701 [01:31<4:31:22, 1.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:13,233 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:13,832 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 46/10701 [01:32<4:20:15, 1.47s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:14,498 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:15,030 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.5447, 'learning_rate': 2.82e-06, 'epoch': 0.01} +{'loss': 9.5612, 'learning_rate': 2.88e-06, 'epoch': 0.01} + 0%|▎ | 47/10701 [01:34<4:05:54, 1.38s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:15,644 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:16,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 48/10701 [01:35<3:50:51, 1.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:16,713 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:17,161 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 49/10701 [01:36<3:36:33, 1.22s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:17,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:18,756 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1771, 'learning_rate': 2.9400000000000002e-06, 'epoch': 0.01} + 0%|▎ | 50/10701 [01:37<3:56:26, 1.33s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:20,074 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.3124, 'learning_rate': 3e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:21,175 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▎ | 51/10701 [01:40<4:54:22, 1.66s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:22,369 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.6376, 'learning_rate': 3.06e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:23,446 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▍ | 52/10701 [01:42<5:26:56, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:24,635 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.3595, 'learning_rate': 3.1199999999999998e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:25,739 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%|▍ | 53/10701 [01:44<5:50:48, 1.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:26,911 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.4499, 'learning_rate': 3.18e-06, 'epoch': 0.01} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:27,995 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 54/10701 [01:47<6:05:56, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:29,219 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.4092, 'learning_rate': 3.24e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:30,271 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 55/10701 [01:49<6:16:54, 2.12s/it] + 1%|▍ | 55/10701 [01:49<6:16:54, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:31,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:32,489 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 56/10701 [01:51<6:21:53, 2.15s/it] + 1%|▍ | 56/10701 [01:51<6:21:53, 2.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:33,659 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:34,776 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 57/10701 [01:53<6:28:57, 2.19s/it] + 1%|▍ | 57/10701 [01:53<6:28:57, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:35,940 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:37,000 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 58/10701 [01:56<6:30:40, 2.20s/it] + 1%|▍ | 58/10701 [01:56<6:30:40, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:38,160 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:39,231 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 59/10701 [01:58<6:32:16, 2.21s/it] + 1%|▍ | 59/10701 [01:58<6:32:16, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:40,383 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:41,412 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 60/10701 [02:00<6:30:41, 2.20s/it] + 1%|▍ | 60/10701 [02:00<6:30:41, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:42,580 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:43,638 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1439, 'learning_rate': 3.66e-06, 'epoch': 0.02} + 1%|▍ | 61/10701 [02:02<6:31:40, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:44,798 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.3793, 'learning_rate': 3.72e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:45,847 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 62/10701 [02:04<6:31:42, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:46,992 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:48,017 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 63/10701 [02:07<6:29:34, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:49,145 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.6052, 'learning_rate': 3.7800000000000002e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:50,154 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 64/10701 [02:09<6:26:14, 2.18s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:51,280 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.2966, 'learning_rate': 3.8400000000000005e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:52,316 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 65/10701 [02:11<6:25:15, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:53,423 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.3686, 'learning_rate': 3.9e-06, 'epoch': 0.02} +{'loss': 9.1426, 'learning_rate': 3.96e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:54,427 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 66/10701 [02:13<6:21:57, 2.15s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:55,576 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.3207, 'learning_rate': 4.0200000000000005e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:56,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 67/10701 [02:15<6:22:19, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:57,731 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1614, 'learning_rate': 4.080000000000001e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:46:58,764 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▍ | 68/10701 [02:17<6:23:22, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:46:59,867 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.2762, 'learning_rate': 4.14e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:00,841 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 69/10701 [02:19<6:18:36, 2.14s/it] + 1%|▌ | 69/10701 [02:19<6:18:36, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:01,946 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:02,962 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 70/10701 [02:22<6:17:58, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:04,135 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:05,125 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 71/10701 [02:24<6:19:17, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:06,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.157, 'learning_rate': 4.26e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:07,220 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 72/10701 [02:26<6:16:52, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:08,298 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.4008, 'learning_rate': 4.32e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:09,285 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 73/10701 [02:28<6:13:32, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:10,372 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.297, 'learning_rate': 4.3799999999999996e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:11,355 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 74/10701 [02:30<6:16:29, 2.13s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:12,520 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.3809, 'learning_rate': 4.44e-06, 'epoch': 0.02} +{'loss': 9.257, 'learning_rate': 4.5e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:13,477 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 75/10701 [02:32<6:11:10, 2.10s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:14,518 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:15,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 76/10701 [02:34<6:06:05, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:16,545 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.0765, 'learning_rate': 4.56e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:17,506 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 77/10701 [02:36<6:03:57, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:18,587 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.5525, 'learning_rate': 4.62e-06, 'epoch': 0.02} +{'loss': 9.1723, 'learning_rate': 4.68e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:19,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 78/10701 [02:38<6:02:44, 2.05s/it] + 1%|▌ | 78/10701 [02:38<6:02:44, 2.05s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:20,588 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:21,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 79/10701 [02:40<6:00:44, 2.04s/it] + 1%|▌ | 79/10701 [02:40<6:00:44, 2.04s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:22,613 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:23,544 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 80/10701 [02:42<5:58:16, 2.02s/it] + 1%|▌ | 80/10701 [02:42<5:58:16, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:24,603 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:25,541 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 81/10701 [02:44<5:56:53, 2.02s/it] + 1%|▌ | 81/10701 [02:44<5:56:53, 2.02s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:26,579 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:27,478 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 82/10701 [02:46<5:52:42, 1.99s/it] + 1%|▌ | 82/10701 [02:46<5:52:42, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:28,505 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:29,417 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 83/10701 [02:48<5:49:36, 1.98s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:30,447 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1196, 'learning_rate': 5.04e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:31,356 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 84/10701 [02:50<5:47:42, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:32,342 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.025, 'learning_rate': 5.1e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:33,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▌ | 85/10701 [02:52<5:44:06, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:34,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1792, 'learning_rate': 5.16e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:35,106 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 86/10701 [02:54<5:39:21, 1.92s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:36,085 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1091, 'learning_rate': 5.22e-06, 'epoch': 0.02} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:36,931 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 87/10701 [02:56<5:34:15, 1.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:37,884 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:38,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.9929, 'learning_rate': 5.279999999999999e-06, 'epoch': 0.02} + 1%|▋ | 88/10701 [02:57<5:29:22, 1.86s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:39,678 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:40,507 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.0523, 'learning_rate': 5.34e-06, 'epoch': 0.02} + 1%|▋ | 89/10701 [02:59<5:24:44, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:41,439 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:42,240 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 90/10701 [03:01<5:19:29, 1.81s/it] + 1%|▋ | 90/10701 [03:01<5:19:29, 1.81s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:43,174 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:43,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 91/10701 [03:03<5:14:09, 1.78s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:44,799 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.8228, 'learning_rate': 5.46e-06, 'epoch': 0.03} +{'loss': 9.01, 'learning_rate': 5.52e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:45,532 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 92/10701 [03:04<5:03:52, 1.72s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:46,347 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1774, 'learning_rate': 5.58e-06, 'epoch': 0.03} +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:47,025 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 93/10701 [03:06<4:51:57, 1.65s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:47,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:48,466 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 94/10701 [03:07<4:40:42, 1.59s/it] +{'loss': 8.7715, 'learning_rate': 5.64e-06, 'epoch': 0.03} + 1%|▋ | 94/10701 [03:07<4:40:42, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:49,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:49,859 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 95/10701 [03:08<4:30:24, 1.53s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:50,578 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 94/10701 [03:07<4:40:42, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:49,255 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 96/10701 [03:10<4:17:48, 1.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:51,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:52,396 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 97/10701 [03:11<4:06:23, 1.39s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:53,027 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 96/10701 [03:10<4:17:48, 1.46s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:51,855 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.0813, 'learning_rate': 5.82e-06, 'epoch': 0.03} + 1%|▋ | 98/10701 [03:12<3:52:43, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:54,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:47:54,621 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 99/10701 [03:13<3:40:27, 1.25s/it] + 1%|▋ | 98/10701 [03:12<3:52:43, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:54,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 100/10701 [03:15<4:00:48, 1.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:54,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 100/10701 [03:15<4:00:48, 1.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:54,156 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 100/10701 [03:15<4:00:48, 1.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:57,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 100/10701 [03:15<4:00:48, 1.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:57,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.1754, 'learning_rate': 6.0600000000000004e-06, 'epoch': 0.03} + 1%|▋ | 100/10701 [03:15<4:00:48, 1.36s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:47:57,565 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 102/10701 [03:19<5:26:42, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:02,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 102/10701 [03:19<5:26:42, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:02,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.0879, 'learning_rate': 6.18e-06, 'epoch': 0.03} + 1%|▋ | 102/10701 [03:19<5:26:42, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:02,093 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 104/10701 [03:24<6:04:46, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:06,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▋ | 104/10701 [03:24<6:04:46, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:06,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 9.124, 'learning_rate': 6.3e-06, 'epoch': 0.03} + 1%|▋ | 104/10701 [03:24<6:04:46, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:06,637 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 106/10701 [03:29<6:21:46, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:11,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 106/10701 [03:29<6:21:46, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:11,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 106/10701 [03:29<6:21:46, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:11,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 108/10701 [03:33<6:26:08, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:11,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 108/10701 [03:33<6:26:08, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:11,144 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.9893, 'learning_rate': 6.48e-06, 'epoch': 0.03} + 1%|▊ | 108/10701 [03:33<6:26:08, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:15,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 108/10701 [03:33<6:26:08, 2.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:15,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 110/10701 [03:37<6:29:45, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:15,574 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 110/10701 [03:37<6:29:45, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:20,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 110/10701 [03:37<6:29:45, 2.21s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:20,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 112/10701 [03:42<6:27:38, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:20,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 112/10701 [03:42<6:27:38, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:20,034 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.8539, 'learning_rate': 6.72e-06, 'epoch': 0.03} + 1%|▊ | 112/10701 [03:42<6:27:38, 2.20s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:24,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 114/10701 [03:46<6:23:28, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:24,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 114/10701 [03:46<6:23:28, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:24,376 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 114/10701 [03:46<6:23:28, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:28,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 114/10701 [03:46<6:23:28, 2.17s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:28,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 116/10701 [03:50<6:20:58, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:28,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 116/10701 [03:50<6:20:58, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:28,687 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 116/10701 [03:50<6:20:58, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:32,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 116/10701 [03:50<6:20:58, 2.16s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:32,966 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 118/10701 [03:55<6:17:14, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:37,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 118/10701 [03:55<6:17:14, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:37,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.9842, 'learning_rate': 7.08e-06, 'epoch': 0.03} + 1%|▊ | 118/10701 [03:55<6:17:14, 2.14s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:37,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 120/10701 [03:59<6:13:50, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:37,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▊ | 120/10701 [03:59<6:13:50, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:37,179 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.984, 'learning_rate': 7.2e-06, 'epoch': 0.03} + 1%|▊ | 120/10701 [03:59<6:13:50, 2.12s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:41,379 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 122/10701 [04:03<6:12:51, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:41,379 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 122/10701 [04:03<6:12:51, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:41,379 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.6948, 'learning_rate': 7.32e-06, 'epoch': 0.03} + 1%|▉ | 122/10701 [04:03<6:12:51, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:45,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 122/10701 [04:03<6:12:51, 2.11s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:45,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 124/10701 [04:07<6:09:05, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:45,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 124/10701 [04:07<6:09:05, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:45,593 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 124/10701 [04:07<6:09:05, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:49,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 124/10701 [04:07<6:09:05, 2.09s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:49,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 126/10701 [04:11<6:04:30, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:49,733 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 126/10701 [04:11<6:04:30, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:53,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 126/10701 [04:11<6:04:30, 2.07s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:53,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 128/10701 [04:15<5:57:29, 2.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:53,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 128/10701 [04:15<5:57:29, 2.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:53,795 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 128/10701 [04:15<5:57:29, 2.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:57,763 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 128/10701 [04:15<5:57:29, 2.03s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:48:57,763 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 130/10701 [04:19<5:51:20, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:01,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 130/10701 [04:19<5:51:20, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:01,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.5824, 'learning_rate': 7.8e-06, 'epoch': 0.04} + 1%|▉ | 130/10701 [04:19<5:51:20, 1.99s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:01,648 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 132/10701 [04:23<5:47:11, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:05,552 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 132/10701 [04:23<5:47:11, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:05,552 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.5085, 'learning_rate': 7.92e-06, 'epoch': 0.04} + 1%|▉ | 132/10701 [04:23<5:47:11, 1.97s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:05,552 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.6462, 'learning_rate': 7.98e-06, 'epoch': 0.04} + 1%|▉ | 134/10701 [04:27<5:41:21, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:09,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 134/10701 [04:27<5:41:21, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:09,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 134/10701 [04:27<5:41:21, 1.94s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:09,329 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 136/10701 [04:31<5:33:29, 1.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:13,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 136/10701 [04:31<5:33:29, 1.89s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:13,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 138/10701 [04:34<5:25:30, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:13,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 138/10701 [04:34<5:25:30, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:13,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.7108, 'learning_rate': 8.220000000000001e-06, 'epoch': 0.04} + 1%|▉ | 138/10701 [04:34<5:25:30, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:16,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|▉ | 138/10701 [04:34<5:25:30, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:16,629 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 140/10701 [04:38<5:15:07, 1.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:20,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 140/10701 [04:38<5:15:07, 1.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:20,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.6197, 'learning_rate': 8.400000000000001e-06, 'epoch': 0.04} + 1%|█ | 140/10701 [04:38<5:15:07, 1.79s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:20,062 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 142/10701 [04:41<4:59:33, 1.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:23,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 142/10701 [04:41<4:59:33, 1.70s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:23,189 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.3621, 'learning_rate': 8.52e-06, 'epoch': 0.04} + 1%|█ | 144/10701 [04:44<4:32:31, 1.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:25,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 144/10701 [04:44<4:32:31, 1.55s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:25,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 146/10701 [04:46<4:07:51, 1.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:25,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 146/10701 [04:46<4:07:51, 1.41s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:25,952 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.7401, 'learning_rate': 8.7e-06, 'epoch': 0.04} + 1%|█ | 148/10701 [04:49<3:43:06, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:28,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 148/10701 [04:49<3:43:06, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:28,449 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.5904, 'learning_rate': 8.82e-06, 'epoch': 0.04} + 1%|█ | 148/10701 [04:49<3:43:06, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:30,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 148/10701 [04:49<3:43:06, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:30,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 150/10701 [04:51<3:48:03, 1.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:30,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 150/10701 [04:51<3:48:03, 1.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:30,650 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7839, 'learning_rate': 9e-06, 'epoch': 0.04} + 1%|█ | 150/10701 [04:51<3:48:03, 1.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:33,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 150/10701 [04:51<3:48:03, 1.30s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:33,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 152/10701 [04:56<5:23:50, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:33,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 152/10701 [04:56<5:23:50, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:33,944 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 152/10701 [04:56<5:23:50, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 152/10701 [04:56<5:23:50, 1.84s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.4699, 'learning_rate': 9.3e-06, 'epoch': 0.04} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.4494, 'learning_rate': 9.36e-06, 'epoch': 0.04} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.4977, 'learning_rate': 9.42e-06, 'epoch': 0.04} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2952, 'learning_rate': 9.48e-06, 'epoch': 0.04} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.5739, 'learning_rate': 9.54e-06, 'epoch': 0.04} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.4479, 'learning_rate': 9.600000000000001e-06, 'epoch': 0.04} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2847, 'learning_rate': 9.66e-06, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.4738, 'learning_rate': 9.72e-06, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.3257, 'learning_rate': 9.780000000000001e-06, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.343, 'learning_rate': 9.84e-06, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2969, 'learning_rate': 9.9e-06, 'epoch': 0.05} +{'loss': 8.467, 'learning_rate': 9.960000000000001e-06, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.3063, 'learning_rate': 1.002e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.1785, 'learning_rate': 1.008e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.223, 'learning_rate': 1.0140000000000001e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.3273, 'learning_rate': 1.02e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.3348, 'learning_rate': 1.0260000000000002e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2142, 'learning_rate': 1.032e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.3112, 'learning_rate': 1.0379999999999999e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.499, 'learning_rate': 1.044e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2831, 'learning_rate': 1.05e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2228, 'learning_rate': 1.0559999999999999e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2267, 'learning_rate': 1.062e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.1144, 'learning_rate': 1.068e-05, 'epoch': 0.05} +{'loss': 8.2748, 'learning_rate': 1.074e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.1962, 'learning_rate': 1.08e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.2714, 'learning_rate': 1.086e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9911, 'learning_rate': 1.092e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0264, 'learning_rate': 1.098e-05, 'epoch': 0.05} + 1%|█ | 154/10701 [05:01<6:02:49, 2.06s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.1794, 'learning_rate': 1.104e-05, 'epoch': 0.05} + [WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0667, 'learning_rate': 1.116e-05, 'epoch': 0.05} +{'loss': 8.1043, 'learning_rate': 1.1220000000000001e-05, 'epoch': 0.05} + [WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9771, 'learning_rate': 1.128e-05, 'epoch': 0.05} + [WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9608, 'learning_rate': 1.134e-05, 'epoch': 0.05} + [WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8684, 'learning_rate': 1.1400000000000001e-05, 'epoch': 0.05} + [WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 192/10701 [06:17<4:45:37, 1.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 192/10701 [06:17<4:45:37, 1.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7387, 'learning_rate': 1.152e-05, 'epoch': 0.05} +{'loss': 7.7955, 'learning_rate': 1.1580000000000001e-05, 'epoch': 0.05} + 2%|█▍ | 192/10701 [06:17<4:45:37, 1.63s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 195/10701 [06:21<4:09:08, 1.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▍ | 195/10701 [06:21<4:09:08, 1.42s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8601, 'learning_rate': 1.1700000000000001e-05, 'epoch': 0.05} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:04,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:04,475 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6168, 'learning_rate': 1.182e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.008, 'learning_rate': 1.1940000000000001e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6062, 'learning_rate': 1.2e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0935, 'learning_rate': 1.2060000000000001e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.1204, 'learning_rate': 1.2120000000000001e-05, 'epoch': 0.06} +{'loss': 7.9509, 'learning_rate': 1.2180000000000002e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0621, 'learning_rate': 1.224e-05, 'epoch': 0.06} +{'loss': 7.9402, 'learning_rate': 1.2299999999999999e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9452, 'learning_rate': 1.236e-05, 'epoch': 0.06} +{'loss': 7.6453, 'learning_rate': 1.242e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0714, 'learning_rate': 1.2479999999999999e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7938, 'learning_rate': 1.254e-05, 'epoch': 0.06} +{'loss': 7.7403, 'learning_rate': 1.26e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9862, 'learning_rate': 1.2659999999999999e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.776, 'learning_rate': 1.272e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7963, 'learning_rate': 1.278e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8802, 'learning_rate': 1.284e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8575, 'learning_rate': 1.29e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9569, 'learning_rate': 1.296e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0029, 'learning_rate': 1.302e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8815, 'learning_rate': 1.308e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6815, 'learning_rate': 1.314e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8216, 'learning_rate': 1.32e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7761, 'learning_rate': 1.326e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 8.0696, 'learning_rate': 1.3320000000000001e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8774, 'learning_rate': 1.338e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7813, 'learning_rate': 1.344e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8391, 'learning_rate': 1.3500000000000001e-05, 'epoch': 0.06} +{'loss': 7.9505, 'learning_rate': 1.356e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6234, 'learning_rate': 1.362e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5466, 'learning_rate': 1.3680000000000001e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6627, 'learning_rate': 1.374e-05, 'epoch': 0.06} +{'loss': 7.662, 'learning_rate': 1.3800000000000002e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8887, 'learning_rate': 1.3860000000000001e-05, 'epoch': 0.06} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8448, 'learning_rate': 1.392e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7958, 'learning_rate': 1.3980000000000002e-05, 'epoch': 0.07} +{'loss': 7.4826, 'learning_rate': 1.4040000000000001e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5913, 'learning_rate': 1.4099999999999999e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7514, 'learning_rate': 1.416e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.661, 'learning_rate': 1.422e-05, 'epoch': 0.07} +{'loss': 7.5707, 'learning_rate': 1.428e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5866, 'learning_rate': 1.434e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6309, 'learning_rate': 1.44e-05, 'epoch': 0.07} +[WARNING|modeling_utils.py:388] 2022-03-02 09:51:06,670 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 242/10701 [07:53<4:51:17, 1.67s/it]g-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▋ | 242/10701 [07:53<4:51:17, 1.67s/it]g-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.9284, 'learning_rate': 1.452e-05, 'epoch': 0.07} +{'loss': 7.7646, 'learning_rate': 1.458e-05, 'epoch': 0.07} + 2%|█▋ | 242/10701 [07:53<4:51:17, 1.67s/it]g-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3227, 'learning_rate': 1.464e-05, 'epoch': 0.07} + 2%|█▋ | 242/10701 [07:53<4:51:17, 1.67s/it]g-point operations will not be computed-02 09:49:38,589 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5144, 'learning_rate': 1.47e-05, 'epoch': 0.07} + 2%|█▊ | 246/10701 [07:58<4:09:30, 1.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 246/10701 [07:58<4:09:30, 1.43s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6973, 'learning_rate': 1.488e-05, 'epoch': 0.07} +{'loss': 7.4415, 'learning_rate': 1.4940000000000001e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8044, 'learning_rate': 1.5e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7265, 'learning_rate': 1.506e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6512, 'learning_rate': 1.5120000000000001e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8493, 'learning_rate': 1.518e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4895, 'learning_rate': 1.524e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6161, 'learning_rate': 1.53e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.8947, 'learning_rate': 1.5360000000000002e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.7195, 'learning_rate': 1.542e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2944, 'learning_rate': 1.548e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.6675, 'learning_rate': 1.554e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.342, 'learning_rate': 1.56e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4145, 'learning_rate': 1.5660000000000003e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4339, 'learning_rate': 1.5720000000000002e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.723, 'learning_rate': 1.578e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3121, 'learning_rate': 1.584e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.458, 'learning_rate': 1.59e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.481, 'learning_rate': 1.596e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.423, 'learning_rate': 1.6020000000000002e-05, 'epoch': 0.07} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.452, 'learning_rate': 1.6080000000000002e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3395, 'learning_rate': 1.614e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5545, 'learning_rate': 1.62e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.297, 'learning_rate': 1.626e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5605, 'learning_rate': 1.6320000000000003e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3852, 'learning_rate': 1.6380000000000002e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3872, 'learning_rate': 1.6440000000000002e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1434, 'learning_rate': 1.65e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5026, 'learning_rate': 1.656e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3842, 'learning_rate': 1.6620000000000004e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4802, 'learning_rate': 1.6680000000000003e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3137, 'learning_rate': 1.6740000000000002e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1712, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2147, 'learning_rate': 1.686e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2708, 'learning_rate': 1.6919999999999997e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3019, 'learning_rate': 1.698e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3527, 'learning_rate': 1.704e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2426, 'learning_rate': 1.71e-05, 'epoch': 0.08} + 2%|█▊ | 248/10701 [08:01<3:50:12, 1.32s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.383, 'learning_rate': 1.7219999999999998e-05, 'epoch': 0.08} + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2301, 'learning_rate': 1.728e-05, 'epoch': 0.08} + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4012, 'learning_rate': 1.734e-05, 'epoch': 0.08} +{'loss': 7.4235, 'learning_rate': 1.74e-05, 'epoch': 0.08} + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5482, 'learning_rate': 1.746e-05, 'epoch': 0.08} + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4363, 'learning_rate': 1.7519999999999998e-05, 'epoch': 0.08} + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2763, 'learning_rate': 1.758e-05, 'epoch': 0.08} +{'loss': 7.2757, 'learning_rate': 1.764e-05, 'epoch': 0.08} + 3%|██ | 287/10701 [09:21<5:20:54, 1.85s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:52:40,337 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 296/10701 [09:34<4:03:30, 1.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 296/10701 [09:34<4:03:30, 1.40s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2012, 'learning_rate': 1.776e-05, 'epoch': 0.08} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3281, 'learning_rate': 1.7879999999999998e-05, 'epoch': 0.08} +{'loss': 6.8298, 'learning_rate': 1.794e-05, 'epoch': 0.08} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4168, 'learning_rate': 1.8e-05, 'epoch': 0.08} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2235, 'learning_rate': 1.806e-05, 'epoch': 0.08} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3229, 'learning_rate': 1.812e-05, 'epoch': 0.08} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2181, 'learning_rate': 1.818e-05, 'epoch': 0.08} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3975, 'learning_rate': 1.824e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2585, 'learning_rate': 1.83e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3365, 'learning_rate': 1.836e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.455, 'learning_rate': 1.842e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0969, 'learning_rate': 1.848e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3453, 'learning_rate': 1.854e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1884, 'learning_rate': 1.86e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3112, 'learning_rate': 1.866e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4935, 'learning_rate': 1.872e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3492, 'learning_rate': 1.878e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2252, 'learning_rate': 1.884e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2, 'learning_rate': 1.8900000000000002e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3023, 'learning_rate': 1.896e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0567, 'learning_rate': 1.902e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1876, 'learning_rate': 1.908e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3179, 'learning_rate': 1.914e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3303, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2902, 'learning_rate': 1.9260000000000002e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0884, 'learning_rate': 1.932e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2818, 'learning_rate': 1.938e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1258, 'learning_rate': 1.95e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0437, 'learning_rate': 1.9560000000000002e-05, 'epoch': 0.09} +{'loss': 7.192, 'learning_rate': 1.9620000000000002e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1372, 'learning_rate': 1.968e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0059, 'learning_rate': 1.974e-05, 'epoch': 0.09} +{'loss': 7.1942, 'learning_rate': 1.98e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1989, 'learning_rate': 1.9860000000000003e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0647, 'learning_rate': 1.9920000000000002e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1529, 'learning_rate': 1.9980000000000002e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0687, 'learning_rate': 2.004e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2183, 'learning_rate': 2.01e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9497, 'learning_rate': 2.016e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.095, 'learning_rate': 2.0220000000000003e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9722, 'learning_rate': 2.0280000000000002e-05, 'epoch': 0.09} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.302, 'learning_rate': 2.0340000000000002e-05, 'epoch': 0.1} + 3%|██▏ | 298/10701 [09:37<3:40:09, 1.27s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1676, 'learning_rate': 2.04e-05, 'epoch': 0.1} + 3%|██▍ | 342/10701 [11:05<4:34:25, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▍ | 342/10701 [11:05<4:34:25, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0249, 'learning_rate': 2.0520000000000003e-05, 'epoch': 0.1} + 3%|██▍ | 342/10701 [11:05<4:34:25, 1.59s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.105, 'learning_rate': 2.0580000000000003e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:50,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:50,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0939, 'learning_rate': 2.07e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:50,319 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9841, 'learning_rate': 2.0759999999999998e-05, 'epoch': 0.1} + 3%|██▌ | 348/10701 [11:13<3:40:47, 1.28s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 3%|██▌ | 348/10701 [11:13<3:40:47, 1.28s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6784, 'learning_rate': 2.088e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.648, 'learning_rate': 2.1e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1398, 'learning_rate': 2.1059999999999998e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2444, 'learning_rate': 2.1119999999999998e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1684, 'learning_rate': 2.118e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3969, 'learning_rate': 2.124e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9408, 'learning_rate': 2.13e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3996, 'learning_rate': 2.136e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9589, 'learning_rate': 2.1419999999999998e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2723, 'learning_rate': 2.148e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1407, 'learning_rate': 2.154e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.338, 'learning_rate': 2.16e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.4253, 'learning_rate': 2.166e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2386, 'learning_rate': 2.172e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2326, 'learning_rate': 2.178e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1585, 'learning_rate': 2.184e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2236, 'learning_rate': 2.19e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1333, 'learning_rate': 2.196e-05, 'epoch': 0.1} +{'loss': 7.2608, 'learning_rate': 2.202e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0539, 'learning_rate': 2.208e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2314, 'learning_rate': 2.214e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8448, 'learning_rate': 2.22e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1213, 'learning_rate': 2.226e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0645, 'learning_rate': 2.232e-05, 'epoch': 0.1} +{'loss': 7.2308, 'learning_rate': 2.238e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2085, 'learning_rate': 2.2440000000000002e-05, 'epoch': 0.1} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9134, 'learning_rate': 2.25e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0166, 'learning_rate': 2.256e-05, 'epoch': 0.11} +{'loss': 7.1703, 'learning_rate': 2.262e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6971, 'learning_rate': 2.268e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.084, 'learning_rate': 2.274e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2437, 'learning_rate': 2.2800000000000002e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2564, 'learning_rate': 2.286e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9667, 'learning_rate': 2.292e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9449, 'learning_rate': 2.298e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2372, 'learning_rate': 2.304e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9318, 'learning_rate': 2.3100000000000002e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1121, 'learning_rate': 2.3160000000000002e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9505, 'learning_rate': 2.322e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:55:56,461 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 389/10701 [12:37<5:18:40, 1.85s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 389/10701 [12:37<5:18:40, 1.85s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1731, 'learning_rate': 2.334e-05, 'epoch': 0.11} + 4%|██▊ | 389/10701 [12:37<5:18:40, 1.85s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0853, 'learning_rate': 2.3400000000000003e-05, 'epoch': 0.11} + 4%|██▊ | 389/10701 [12:37<5:18:40, 1.85s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8283, 'learning_rate': 2.3460000000000002e-05, 'epoch': 0.11} +{'loss': 7.2011, 'learning_rate': 2.3520000000000002e-05, 'epoch': 0.11} + 4%|██▊ | 389/10701 [12:37<5:18:40, 1.85s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 394/10701 [12:45<4:38:42, 1.62s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 394/10701 [12:45<4:38:42, 1.62s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9783, 'learning_rate': 2.364e-05, 'epoch': 0.11} + 4%|██▊ | 394/10701 [12:45<4:38:42, 1.62s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3072, 'learning_rate': 2.37e-05, 'epoch': 0.11} + 4%|██▊ | 397/10701 [12:49<4:00:48, 1.40s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|██▊ | 397/10701 [12:49<4:00:48, 1.40s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8423, 'learning_rate': 2.3820000000000002e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6825, 'learning_rate': 2.394e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3885, 'learning_rate': 2.4e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2664, 'learning_rate': 2.4060000000000003e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9272, 'learning_rate': 2.4120000000000003e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2954, 'learning_rate': 2.4180000000000002e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.88, 'learning_rate': 2.4240000000000002e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8738, 'learning_rate': 2.43e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.419, 'learning_rate': 2.4360000000000004e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1816, 'learning_rate': 2.442e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1283, 'learning_rate': 2.448e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2607, 'learning_rate': 2.454e-05, 'epoch': 0.11} +{'loss': 6.9234, 'learning_rate': 2.4599999999999998e-05, 'epoch': 0.11} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0308, 'learning_rate': 2.4659999999999998e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8637, 'learning_rate': 2.472e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9558, 'learning_rate': 2.478e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2137, 'learning_rate': 2.484e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1712, 'learning_rate': 2.49e-05, 'epoch': 0.12} +{'loss': 7.2463, 'learning_rate': 2.4959999999999998e-05, 'epoch': 0.12} +[WARNING|modeling_utils.py:388] 2022-03-02 09:57:32,729 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7192, 'learning_rate': 2.508e-05, 'epoch': 0.12} +{'loss': 7.0369, 'learning_rate': 2.514e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2517, 'learning_rate': 2.52e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1193, 'learning_rate': 2.526e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0365, 'learning_rate': 2.5319999999999998e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3968, 'learning_rate': 2.538e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0466, 'learning_rate': 2.544e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1708, 'learning_rate': 2.55e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2789, 'learning_rate': 2.556e-05, 'epoch': 0.12} +{'loss': 7.3759, 'learning_rate': 2.562e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1169, 'learning_rate': 2.568e-05, 'epoch': 0.12} + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 418/10701 [13:32<6:02:57, 2.12s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9739, 'learning_rate': 2.58e-05, 'epoch': 0.12} +{'loss': 6.8157, 'learning_rate': 2.586e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3529, 'learning_rate': 2.592e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2562, 'learning_rate': 2.5980000000000002e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.007, 'learning_rate': 2.604e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.115, 'learning_rate': 2.61e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3253, 'learning_rate': 2.616e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2258, 'learning_rate': 2.622e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.028, 'learning_rate': 2.628e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0361, 'learning_rate': 2.6340000000000002e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2074, 'learning_rate': 2.64e-05, 'epoch': 0.12} +{'loss': 7.1562, 'learning_rate': 2.646e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8889, 'learning_rate': 2.652e-05, 'epoch': 0.12} + 4%|███ | 430/10701 [13:57<5:46:39, 2.03s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8523, 'learning_rate': 2.658e-05, 'epoch': 0.12} + 4%|███▏ | 444/10701 [14:21<4:11:34, 1.47s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 444/10701 [14:21<4:11:34, 1.47s/it]g-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:59:04,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 09:59:04,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8937, 'learning_rate': 2.676e-05, 'epoch': 0.13} +{'loss': 6.3308, 'learning_rate': 2.682e-05, 'epoch': 0.13} +[WARNING|modeling_utils.py:388] 2022-03-02 09:59:04,909 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:54:16,551 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7587, 'learning_rate': 2.688e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5485, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2754, 'learning_rate': 2.7060000000000002e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0801, 'learning_rate': 2.712e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3503, 'learning_rate': 2.718e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0553, 'learning_rate': 2.724e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8619, 'learning_rate': 2.7300000000000003e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0834, 'learning_rate': 2.7360000000000002e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0272, 'learning_rate': 2.7420000000000002e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7911, 'learning_rate': 2.748e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8926, 'learning_rate': 2.754e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1745, 'learning_rate': 2.7600000000000003e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2602, 'learning_rate': 2.7660000000000003e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2217, 'learning_rate': 2.7720000000000002e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1336, 'learning_rate': 2.778e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9218, 'learning_rate': 2.784e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7562, 'learning_rate': 2.79e-05, 'epoch': 0.13} +{'loss': 6.696, 'learning_rate': 2.7960000000000003e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9161, 'learning_rate': 2.8020000000000003e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.025, 'learning_rate': 2.8080000000000002e-05, 'epoch': 0.13} +{'loss': 6.8767, 'learning_rate': 2.8139999999999998e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.207, 'learning_rate': 2.8199999999999998e-05, 'epoch': 0.13} +{'loss': 7.0644, 'learning_rate': 2.826e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.831, 'learning_rate': 2.832e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7322, 'learning_rate': 2.838e-05, 'epoch': 0.13} +{'loss': 7.0429, 'learning_rate': 2.844e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.999, 'learning_rate': 2.8499999999999998e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.259, 'learning_rate': 2.856e-05, 'epoch': 0.13} +{'loss': 7.141, 'learning_rate': 2.862e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9479, 'learning_rate': 2.868e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2214, 'learning_rate': 2.874e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0184, 'learning_rate': 2.88e-05, 'epoch': 0.13} +{'loss': 7.1725, 'learning_rate': 2.8859999999999998e-05, 'epoch': 0.13} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9388, 'learning_rate': 2.892e-05, 'epoch': 0.14} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1595, 'learning_rate': 2.898e-05, 'epoch': 0.14} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9544, 'learning_rate': 2.904e-05, 'epoch': 0.14} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8867, 'learning_rate': 2.91e-05, 'epoch': 0.14} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0088, 'learning_rate': 2.916e-05, 'epoch': 0.14} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0532, 'learning_rate': 2.922e-05, 'epoch': 0.14} + 4%|███▏ | 449/10701 [14:27<3:22:42, 1.19s/it][WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + [WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9692, 'learning_rate': 2.934e-05, 'epoch': 0.14} +{'loss': 6.8579, 'learning_rate': 2.94e-05, 'epoch': 0.14} + [WARNING|modeling_utils.py:388] 2022-03-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8996, 'learning_rate': 2.946e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 10:00:36,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 10:00:36,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8518, 'learning_rate': 2.958e-05, 'epoch': 0.14} +{'loss': 7.1199, 'learning_rate': 2.964e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 10:00:36,663 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9929, 'learning_rate': 2.97e-05, 'epoch': 0.14} + 5%|███▌ | 496/10701 [15:59<3:43:05, 1.31s/it]g-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 5%|███▌ | 496/10701 [15:59<3:43:05, 1.31s/it]g-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5701, 'learning_rate': 2.982e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 10:00:42,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:388] 2022-03-02 10:00:42,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5625, 'learning_rate': 2.994e-05, 'epoch': 0.14} +[WARNING|modeling_utils.py:388] 2022-03-02 10:00:42,630 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +03/02/2022 10:07:58 - INFO - datasets.metric - Removing /home/sanchit_huggingface_co/.cache/huggingface/metrics/wer/default/default_experiment-1-0.arrow +{'eval_loss': 6.984083652496338, 'eval_wer': 0.9473381352064607, 'eval_runtime': 433.6165, 'eval_samples_per_second': 6.093, 'eval_steps_per_second': 1.524, 'epoch': 0.14} +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[INFO|trainer.py:2366] 2022-03-02 10:00:45,367 >> Num examples = 2642timate the number of tokens of the input, floating-point operations will not be computed-02 09:59:08,813 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed